data <- read.csv("/home/bambrozi/2_CORTISOL/Data/T4_Elora_Data_04_25_2024.csv")
# Replace "treated" with NA
data$T4Cortisol[data$T4Cortisol == "treated" | data$T4Cortisol == "Treated at T2" | data$T4Cortisol == "treated at T2"] <- NA
# Convert the column to numeric, coercing non-numeric values to NA
data$T4Cortisol <- as.numeric(as.character(data$T4Cortisol))
#Filtering only the lines with values
data <- data[!is.na(data$T4Cortisol),]
#creating new data file cleaned
write.csv(data, "/home/bambrozi/2_CORTISOL/Data/data_clean.csv", row.names = F)
print(data)
# Summary Statistics
summary(data$T4Cortisol)
# Histogram
hist(data$T4Cortisol, breaks = 20, main = "Histogram of T4 Cortisol", xlab = "T4 Cortisol")
# Boxplot
boxplot(data$T4Cortisol, main = "Boxplot of T4 Cortisol", ylab = "T4 Cortisol")
# Density Plot
plot(density(data$T4Cortisol), main = "Density Plot of T4 Cortisol", xlab = "T4 Cortisol", ylab = "Density")
# Calculate the theoretical quantiles
qqnorm(data$T4Cortisol, main = "QQ Plot of T4Cortisol", xlim = c(min(qqnorm(data$T4Cortisol)$x), max(qqnorm(data$T4Cortisol)$x)), ylim = c(min(qqnorm(data$T4Cortisol)$y), max(qqnorm(data$T4Cortisol)$y) + 2 * IQR(qqnorm(data$T4Cortisol)$y)))
# Add the QQ line
qqline(data$T4Cortisol, col = "red")
Summary statistics
Histogram
Density
Box_Plot
qq_Plot
Shapiro-Wilk normality test
I received from Umesh a e-mail informing the three categories that the animals could be sorted based on their cortisol concentration.
data$Categorical <- ifelse(data$T4Cortisol >= 956, "H",
ifelse(data$T4Cortisol <= 190.8, "L", "M"))
table(data$Categorical)
library(ggplot2)
# Reorder the levels of the 'Categorical' column
data$Categorical <- factor(data$Categorical, levels = c("L", "M", "H"))
# Create the histogram with reordered categories
ggplot(data, aes(x = Categorical, fill = Categorical)) +
geom_bar() +
labs(title = "Histogram of T4Cortisol by Category",
x = "Category",
y = "Frequency") +
theme_minimal()
# Create the histogram
ggplot(data, aes(x = T4Cortisol, fill = Categorical)) +
geom_histogram(binwidth = 50, color = "black", alpha = 0.7) + # Adjust binwidth as needed
labs(title = "Histogram of T4Cortisol with Color by Category",
x = "T4 Cortisol",
y = "Frequency",
fill = "Category") +
scale_fill_manual(values = c("H" = "red", "M" = "blue", "L" = "green")) + # Adjust colors if needed
theme_minimal()
# Create the density plot
ggplot(data, aes(x = T4Cortisol, fill = Categorical)) +
geom_density(alpha = 0.3) +
labs(title = "Density Plot of T4Cortisol with Color by Category",
x = "T4Cortisol",
y = "Density",
fill = "Category") +
scale_fill_manual(values = c("H" = "red", "M" = "blue", "L" = "green")) + # Adjust colors if needed
theme_minimal()
# Create a density plot
ggplot(data, aes(x = T4Cortisol)) +
geom_density() +
geom_vline(xintercept = c(956, 190.8), linetype = "dashed", color = "red") +
labs(title = "Density Plot of T4Cortisol with Vertical Lines",
x = "T4Cortisol",
y = "Density") +
theme_minimal()
The animals were sorted in these three categories >H = Hight >M = Medium >L = Low
The individuals frequency distribution in theese categories are shown in the plots below
Observing the previous plots I tried to remove the “outliers” phenotypes above 1250, but the outcome from Shapiro test is still indicating no normality of the data.
library(tidyverse)
data_no_out <- filter(data, T4Cortisol < 1250)
# Create QQ plot
qqnorm(data_no_out$T4Cortisol, main = "QQ Plot of T4Cortisol", xlab = "Theoretical Quantiles", ylab = "Sample Quantiles")
qqline(data$T4Cortisol, col = "red")
boxplot(data_no_out$T4Cortisol, main = "Boxplot of T4 Cortisol", ylab = "T4 Cortisol")
hist(data_no_out$T4Cortisol, breaks = 20, main = "Histogram of T4 Cortisol", xlab = "T4 Cortisol")
shapiro.test(data$T4Cortisol)
Lucas Alcântara sent me the path to the genotype and pedigree files: /data/cgil/daiclu/6_Data/database/public_output/bruno
In this folder we found the following files:
I made a copy of this files in a folder called Raw_files:
/home/bambrozi/2_CORTISOL/RawFiles
This directory has two sub-directories:To perfome this I used two codes
sed -i '1i id Call' genotypes.txt
filename = 'bruno_gntps.txt'
outputFileOpen = open('genoplink.ped','w')
recode = {'0':['C','C'] , '1':['A','C'] , '2':['A','A'] , '5':['0','0'] }
for line in open(filename,'r'):
if 'Call' in line : continue
ids, call = line.strip().split()
genotypes = [ recode[geno012][0] +' '+ recode[geno012][1] for geno012 in call ]
outputFileOpen.write("%s %s %s %s %s %s %s\n" % ('HO', ids, '0','0','0','-9',' '.join(genotypes)) )
Now I have a folder called /home/bambrozi/2_CORTISOL/Geno_files with the file named genoplink.ped
library(data.table)
pheno <- read.csv("/home/bambrozi/2_CORTISOL/Data/data_clean.csv")
ped <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv")
geno <- fread("/home/bambrozi/2_CORTISOL/Geno_files/genoplink.ped")
geno <- geno[,c("V2")]
#Bringing cdn_id to my phenotype file
#Generate a index with the match
matching_indices <- match(pheno$ID, ped$elora_id)
# Then, assign 'cdn_id' from 'ped' to 'pheno' where there are matches
pheno$cdn_id <- ifelse(!is.na(matching_indices), ped$cdn_id[matching_indices], NA)
#Making a phenotype file only with genotyped animals
pheno_genotyped <- pheno[pheno$cdn_id %in% geno$V2,]
#check if all animals in this file are genotyped
checkk <- pheno_genotyped$cdn_id %in% geno$V2
sum(checkk)
The phenotype file should have three columns: FID, Animal_id, Phenotype
HO <- rep("HO", 252)
pheno_gwas <- as.data.frame(cbind(HO, pheno_genotyped$cdn_id, pheno_genotyped$T4Cortisol))
colnames(pheno_gwas) <- c("FID", "cdn_id", "cortisol")
pheno_gwas$cdn_id <- as.numeric(pheno_gwas$cdn_id)
pheno_gwas$cortisol <- round(as.numeric(pheno_gwas$cortisol),2)
write.table(pheno_gwas, "/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", quote = F, row.names = F, col.names = T)
map <- fread("/data/cgil/daiclu/6_Data/database/public_output/bruno/DGVsnpinfo.2404.ho")
morgan <- data.frame(X0 = rep(0, 45101))
mapa=as.data.frame(cbind(map$chr, map$snp_name, morgan$X0, map$location))
head(mapa)
write.table(x = mapa, file = "/home/bambrozi/2_CORTISOL/Geno_files/genoplink.map", row.names = FALSE, col.names = FALSE, sep = "\t", quote = FALSE)
system("/home/local/bin/plink --cow --nonfounders --allow-no-sex --keep-allele-order --file /home/bambrozi/2_CORTISOL/Geno_files/genoplink --make-bed --out /home/bambrozi/2_CORTISOL/Geno_files/genoplink")
With the code above I generated the bfiles:
We ran the code below to perfom the QC
#!/bin/bash
DATA=/home/bambrozi/2_CORTISOL/Geno_files/genoplink
RESULT=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
/home/local/bin/plink \
--bfile ${DATA} \
--cow \
--allow-no-sex \
--hwe 1e-5 \
--maf 0.01 \
--geno 0.1 \
--mind 0.1 \
--keep-allele-order \
--make-bed \
--out ${RESULT}
The server screen outcome is shown below.
After the Quality Control we ended up with
To check for duplicated individuals I performed the KINSHIP analysis using one script from Larissa Braga. Running the King Analysis on Plink.
#!/bin/bash
DATA=/home/bambrozi/Extrm_ARS1_GrassHill_1/GENOTYPES/ONLY_GRASSHILL_AND_PHENO_after_QC/only_grasshill_and_pheno_clean
RESULT=/home/bambrozi/Extrm_ARS1_GrassHill_1/GENOTYPES/KING/result_king
plink2 \
--bfile ${DATA} \
--chr-set 29 \
--make-king-table \
--out ${RESULT}
This is the output screen on terminal:
The table below is the output /home/bambrozi/2_CORTISOL/Geno_files_after_KING/result_king.kin0 and have pairwise comparisons between all individuals.
Now we should open in R and check for individuals with more than 0,354, to perform this we can use the code below, also provided by Larissa Braga:
setwd("/home/bambrozi/2_CORTISOL/Geno_files_after_KING")
relatedness="result_king.kin0" ## change accordingly!!
library(data.table)
print(relatedness)
rel=fread(relatedness, h = T)
head(rel)
print("Individuals with different identifications above the cut off of 0.354:")
dup=subset(rel, KINSHIP >= 0.354 & IID1!=IID2)
print(dup)
nrow(dup)
So the code above will provide this output if there is any duplicated individual.
We do not have any duplicated individual
So the file to be used are those in the directory /home/bambrozi/2_CORTISOL/Geno_files_after_QC
files:genoplink_clean
After Quality Control we didn’t lost any animal, so we don’t need to update our phenotype file
Now before performing the PCA analysis we need to change the FID for those individuals that has phenotype = 1 for Nadia.
#!/bin/bash
DATA=/home/bambrozi/Extrm_ARS1_GrassHill_1/PCA/imput_pca
RESULT=/home/bambrozi/Extrm_ARS1_GrassHill_1/PCA/pca_result
plink \
--bfile ${DATA} \
--keep-allele-order \
--chr-set 29 \
--pca \
--out ${RESULT}
The PCA output:
After generate the Eigenvalues and Eigenvectors I used the code below to generate the PCA Plot
setwd("/home/bambrozi/2_CORTISOL/PCA")
library(ggplot2)
library(tidyverse)
##
# Visualize PCA results
###
# read in result files
eigenValues <- read_delim("pca_result.eigenval", delim = " ", col_names = F)
eigenVectors <- read_delim("pca_result.eigenvec", delim = " ", col_names = F)
## Proportion of variation captured by each vector
eigen_percent <- round((eigenValues / (sum(eigenValues))*100), 2)
# PCA plot
png("pca-plink.eng.png", width=5, height=5, units="in", res=300)
ggplot(data = eigenVectors) +
geom_point(mapping = aes(x = X3, y = X4), color = "red", shape = 19, size = 1, alpha = 1) +
geom_hline(yintercept = 0, linetype="dotted") +
geom_vline(xintercept = 0, linetype="dotted") +
labs(x = paste0("Principal component 1 (", eigen_percent[1,1], " %)"),
y = paste0("Principal component 2 (", eigen_percent[2,1], " %)")) +
theme_minimal()
dev.off()
# PCA plot with animal ids
png("pca-plink.eng.animals_id.png", width=50, height=50, units="in", res=300)
ggplot(data = eigenVectors) +
geom_point(mapping = aes(x = X3, y = X4), color = "red", shape = 19, size = 5, alpha = 1) +
geom_text(mapping = aes(x = X3, y = X4, label = X2), size = 2, hjust = 0, vjust = 0) + # Add labels for animal IDs
geom_hline(yintercept = 0, linetype="dotted") +
geom_vline(xintercept = 0, linetype="dotted") +
labs(x = paste0("Principal component 1 (", eigen_percent[1,1], " %)"),
y = paste0("Principal component 2 (", eigen_percent[2,1], " %)")) +
theme_minimal()
dev.off()
#!/bin/bash
DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/GWAS/GWAS_result
PHENO=/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt
/home/local/bin/gcta \
--bfile ${DATA} \
--mlma-loco \
--pheno ${PHENO} \
--autosome-num 29 \
--autosome \
--out ${RESULT}
This is te output from the code above:
gwas<- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_result.loco.mlma",
head=T, stringsAsFactors = F)
gwas$Chr<- as.factor(gwas$Chr)
gwas$logP<- -log10(gwas$p)
rmv<- which(gwas$logP == "NaN")
if (length(rmv) >=1) {gwas <- gwas[-rmv,]}
bonf<- -log10(0.05/nrow(gwas))
library(GHap)
ghap.manhattan(data=gwas,chr="Chr", bp="bp", y="logP", type="p", pch = 20,
cex=1, lwd=1, ylab="", xlab="Chromossomes",
main="GWAS Cortisol", backcolor="#F5EFE780", chr.ang=0,)
abline(h=(bonf), col="red2")
legend("topleft", col="red2", lwd=2, c("Bonferroni correction"), bty="n")
The code above creates a Manhattam Plot, the correction for multiple test is the Bonferroni correction
The code below will save information of Significant SNP.
library(tidyverse)
SNP_sig_bonf <- filter(gwas, logP > bonf)
write.table(SNP_sig_bonf, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
Below we can see the 1 significant SNP for Bonferroni correction.
gwas$bh <- p.adjust(gwas$p, method = "BH") #FDR
The code below will create a file with the significant snp for FDR-BH
SNP_sig_BH <- filter(gwas, bh < 0.05)
write.table(SNP_sig_BH, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_BH.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
As outcome we can see that the FDR-BH method didn’t increase the number of significant SNP.
Now we are going to apply a correction for multiple testing modifying the Bonferroni test adjusting not for the total number of SNPs but for the number of the independent segments in the genome.
Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")
library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
summarise(total_length = sum(`Seq length`)) %>%
pull(total_length)
# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8
# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)
NeL <- Ne*L_M
# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)
gwas<- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_result.loco.mlma",
head=T, stringsAsFactors = F)
gwas$Chr<- as.factor(gwas$Chr)
gwas$logP<- -log10(gwas$p)
rmv<- which(gwas$logP == "NaN")
if (length(rmv) >=1) {gwas <- gwas[-rmv,]}
bonf<- -log10(0.05/Me)
library(GHap)
ghap.manhattan(data=gwas,chr="Chr", bp="bp", y="logP", type="p", pch = 20,
cex=1, lwd=1, ylab="", xlab="Chromossomes",
main="GWAS Cortisol", backcolor="#F5EFE780", chr.ang=0,)
abline(h=(bonf), col="red2")
legend("topleft", col="red2", lwd=2, c("Bonferroni corr. for ind. segments"), bty="n")
library(tidyverse)
SNP_sig_bonf_ind_seg <- filter(gwas, logP > bonf)
write.table(SNP_sig_bonf_ind_seg, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
Below we can find the list of significant SNPs after the correction for Multiple Testing
I performed the qqPlot analysis using the code below:
gwas<- read.table("GWAS_result.loco.mlma", h=T)
ps<- gwas$p
inflation <- function(ps) {
chisq <- qchisq(1 - ps, 1)
lambda <- median(chisq) / qchisq(0.5, 1)
lambda
}
# Calculating the lambda - the lambda statistic should be close to 1 if
#the points fall within the expected range, or greater than one if
# the observed p-values are more significant than expected.
inflation(ps)
bonf<- -log10(0.05/nrow(gwas))
gwas$log<- -log10(gwas$p)
gg_qqplot <- function(ps, ci = 0.95) {
n <- length(ps)
df <- data.frame(
observed = -log10(sort(ps)),
expected = -log10(ppoints(n)),
clower = -log10(qbeta(p = (1 - ci) / 2, shape1 = 1:n, shape2 = n:1)),
cupper = -log10(qbeta(p = (1 + ci) / 2, shape1 = 1:n, shape2 = n:1))
)
log10Pe <- expression(paste("Expected -log"[10], plain(P)))
log10Po <- expression(paste("Observed -log"[10], plain(P)))
ggplot(df) +
geom_ribbon(
mapping = aes(x = expected, ymin = clower, ymax = cupper),
alpha = 0.1
) +
geom_point(aes(expected, observed), shape = 1, size = 3) +
geom_abline(intercept = 0, slope = 1, alpha = 0.5) +
# geom_line(aes(expected, cupper), linetype = 2, size = 0.5) +
# geom_line(aes(expected, clower), linetype = 2, size = 0.5) +
xlab(log10Pe) +
ylab(log10Po)
}
## plot -----
gg_qqplot(ps) +
theme_bw(base_size = 24) +
annotate(
geom = "text",
x = -Inf,
y = Inf,
hjust = -0.15,
vjust = 1 + 0.15 * 3,
label = sprintf("λ = %.2f", inflation(ps)),
size = 8
) +
theme(
axis.ticks = element_line(size = 0.5),
panel.grid = element_blank()
# panel.grid = element_line(size = 0.5, color = "grey80")
)
The outcome is the qqplot below, and the lambda value is \(\lambda\) = 1.03
After insert all rsID on VEP the summary is shown below:
Bellow I’m gonna show the “genome view” for each SNP (variant) recovered from VEP:
rs42217767 ps.
this variant wasn’t recovered automaticaly by VEP, so I typed the chr
and position on the search area of the “genome viewer”
rs110031217
rs110991998
rs42089058
rs41644634
rs41567074
ps. the unique reference genome available in the VEP is the ARS-UCD1.3, which is not a problem once it is working with the rsID, which for sure is our variant.
# GALLO
#import a QTL annotation file
qtl_UCD1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Animal_QTLdb_release53_cattleARS_UCD1.gff.gz",file_type="gff")
#import a gene annotation file
gene_UDC1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Bos_taurus.ARS-UCD1.2.110.gtf.gz",file_type="gtf")
#import MARKER files = the GWAS output
gwas <- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg.txt",
head=T, stringsAsFactors = F)
# Assuming "gwas" is your dataframe
gwas <- subset(gwas, select = c(Chr, SNP, bp))
colnames(gwas) <- c("CHR","SNP", "BP")
#FINDING GENES AND QTLs ARROUND THE MARKER
#FINDING GENES
out.genes <- find_genes_qtls_around_markers(db_file= gene_UDC1_2,
marker_file= gwas,
method = "gene",
marker = "snp",
interval = 50000,
nThreads = NULL)
write.table(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/out_genes_50k.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
write.csv(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/out_genes_50k.csv")
#FINDING QTLs
out.qtl <- find_genes_qtls_around_markers(db_file= qtl_UCD1_2,
marker_file= gwas,
method = "qtl",
marker = "snp",
interval = 50000,
nThreads = NULL)
write.table(out.qtl, file = "/home/bambrozi/2_CORTISOL/GALLO/out_qtl_50k.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
library(tidyverse)
out.qtl.clean <- select(out.qtl, c("CHR", "SNP", "BP", "QTL_type", "start_pos", "end_pos","QTL_ID"))
write.csv(out.qtl.clean, file = "/home/bambrozi/2_CORTISOL/GALLO/out_qtl_50k_clean.csv")
Dowloading the .gtf file from Ensembl https://useast.ensembl.org/info/data/ftp/index.html
Downloading the .gff file from AnimalQTLdb https://www.animalgenome.org/cgi-bin/QTLdb/index
The GALLO output are bellow:
For GENES
| X | CHR | SNP | BP | chr | start_pos | end_pos | width | strand | gene_id | gene_name | gene_biotype |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 11 | BTB-01060598 | 32301594 | 11 | 32278324 | 32766620 | 488297 | - | ENSBTAG00000046199 | NRXN1 | protein_coding |
| 2 | 11 | BTB-01060598 | 32301594 | 11 | 32280416 | 32280512 | 97 | - | ENSBTAG00000044573 | U6 | snRNA |
| 3 | 22 | ARS-BFGL-NGS-26419 | 46052082 | 22 | 45924535 | 46818511 | 893977 | - | ENSBTAG00000013117 | CACNA2D3 | protein_coding |
| 4 | 22 | ARS-BFGL-NGS-26419 | 46052082 | 22 | 46051998 | 46062657 | 10660 | + | ENSBTAG00000013124 | LRTM1 | protein_coding |
| 5 | 22 | ARS-BFGL-NGS-23411 | 5134078 | 22 | 5091037 | 5184321 | 93285 | + | ENSBTAG00000019832 | TGFBR2 | protein_coding |
| 6 | 26 | BTB-00926636 | 17857480 | 26 | 17703169 | 17846407 | 143239 | - | ENSBTAG00000011743 | TLL2 | protein_coding |
| 7 | 26 | BTB-00926636 | 17857480 | 26 | 17851539 | 17912665 | 61127 | - | ENSBTAG00000016298 | TM9SF3 | protein_coding |
| 8 | 26 | BTA-62125-no-rs | 17899619 | 26 | 17851539 | 17912665 | 61127 | - | ENSBTAG00000016298 | TM9SF3 | protein_coding |
| 9 | 26 | BTA-62125-no-rs | 17899619 | 26 | 17925643 | 18029878 | 104236 | - | ENSBTAG00000019872 | PIK3AP1 | protein_coding |
| 10 | 28 | BTA-99379-no-rs | 41666186 | 28 | 41609015 | 41649016 | 40002 | - | ENSBTAG00000007540 | GLUD1 | protein_coding |
| 11 | 28 | BTA-99379-no-rs | 41666186 | 28 | 41650333 | 41763371 | 113039 | + | ENSBTAG00000019194 | SHLD2 | protein_coding |
FOR QTLs
| X | CHR | SNP | BP | QTL_type | start_pos | end_pos | QTL_ID |
|---|---|---|---|---|---|---|---|
| 1 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5109498 | 5109502 | 151609 |
| 2 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5134076 | 5134080 | 151620 |
| 3 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5173197 | 5173201 | 151601 |
| 4 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51802 |
| 5 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51803 |
| 6 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51804 |
| 7 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51805 |
| 8 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Production | 46052080 | 46052084 | 51806 |
| 9 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Production | 46052080 | 46052084 | 51807 |
| 10 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51808 |
| 11 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51809 |
| 12 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Health | 46052080 | 46052084 | 51810 |
| 13 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51811 |
| 14 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51812 |
| 15 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51813 |
| 16 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Milk | 46073517 | 46073521 | 243365 |
| 17 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 195264 |
| 18 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 200269 |
| 19 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 200270 |
| 20 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 205295 |
| 21 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 198859 |
| 22 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 203868 |
| 23 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 203869 |
| 24 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 210297 |
| 25 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 198858 |
| 26 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 203866 |
| 27 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 203867 |
| 28 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 210296 |
| 29 | 26 | BTB-00926636 | 17857480 | Milk | 17811725 | 17811729 | 197493 |
| 30 | 26 | BTB-00926636 | 17857480 | Milk | 17811725 | 17811729 | 202704 |
| 31 | 26 | BTB-00926636 | 17857480 | Milk | 17812273 | 17812277 | 195685 |
| 32 | 26 | BTB-00926636 | 17857480 | Milk | 17812273 | 17812277 | 200785 |
| 33 | 26 | BTB-00926636 | 17857480 | Milk | 17813212 | 17813216 | 195285 |
| 34 | 26 | BTB-00926636 | 17857480 | Milk | 17813212 | 17813216 | 200301 |
| 35 | 26 | BTB-00926636 | 17857480 | Milk | 17813978 | 17813982 | 196236 |
| 36 | 26 | BTB-00926636 | 17857480 | Milk | 17813978 | 17813982 | 201468 |
| 37 | 26 | BTB-00926636 | 17857480 | Milk | 17815237 | 17815241 | 198002 |
| 38 | 26 | BTB-00926636 | 17857480 | Milk | 17815237 | 17815241 | 203190 |
| 39 | 26 | BTB-00926636 | 17857480 | Milk | 17817223 | 17817227 | 34750 |
| 40 | 26 | BTB-00926636 | 17857480 | Milk | 17817223 | 17817227 | 195586 |
| 41 | 26 | BTB-00926636 | 17857480 | Milk | 17821297 | 17821301 | 197235 |
| 42 | 26 | BTB-00926636 | 17857480 | Milk | 17821297 | 17821301 | 202455 |
| 43 | 26 | BTB-00926636 | 17857480 | Milk | 17822209 | 17822213 | 197783 |
| 44 | 26 | BTB-00926636 | 17857480 | Milk | 17822209 | 17822213 | 202983 |
| 45 | 26 | BTB-00926636 | 17857480 | Milk | 17823105 | 17823109 | 196204 |
| 46 | 26 | BTB-00926636 | 17857480 | Milk | 17823105 | 17823109 | 201419 |
| 47 | 26 | BTB-00926636 | 17857480 | Milk | 17824770 | 17824774 | 196867 |
| 48 | 26 | BTB-00926636 | 17857480 | Milk | 17824770 | 17824774 | 202101 |
| 49 | 26 | BTB-00926636 | 17857480 | Milk | 17833551 | 17833555 | 195748 |
| 50 | 26 | BTB-00926636 | 17857480 | Milk | 17833551 | 17833555 | 200855 |
| 51 | 26 | BTB-00926636 | 17857480 | Milk | 17833551 | 17833555 | 206213 |
| 52 | 26 | BTB-00926636 | 17857480 | Milk | 17865069 | 17865073 | 197144 |
| 53 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17865069 | 17865073 | 197144 |
| 54 | 26 | BTB-00926636 | 17857480 | Milk | 17865069 | 17865073 | 202367 |
| 55 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17865069 | 17865073 | 202367 |
| 56 | 26 | BTB-00926636 | 17857480 | Milk | 17865069 | 17865073 | 208485 |
| 57 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17865069 | 17865073 | 208485 |
| 58 | 26 | BTB-00926636 | 17857480 | Milk | 17871201 | 17871205 | 197611 |
| 59 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17871201 | 17871205 | 197611 |
| 60 | 26 | BTB-00926636 | 17857480 | Milk | 17871201 | 17871205 | 202816 |
| 61 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17871201 | 17871205 | 202816 |
| 62 | 26 | BTB-00926636 | 17857480 | Milk | 17871201 | 17871205 | 209076 |
| 63 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17871201 | 17871205 | 209076 |
| 64 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 63656 |
| 65 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 63656 |
| 66 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 199068 |
| 67 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 199068 |
| 68 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 204113 |
| 69 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 204113 |
| 70 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 204114 |
| 71 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 204114 |
| 72 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 210399 |
| 73 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 210399 |
| 74 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 63657 |
| 75 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 63657 |
| 76 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 199069 |
| 77 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 199069 |
| 78 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 204115 |
| 79 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 204115 |
| 80 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 204116 |
| 81 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 204116 |
| 82 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 210400 |
| 83 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 210400 |
| 84 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 199070 |
| 85 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 199070 |
| 86 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 204117 |
| 87 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 204117 |
| 88 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 204118 |
| 89 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 204118 |
| 90 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 210401 |
| 91 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 210401 |
| 92 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 199071 |
| 93 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 199071 |
| 94 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 204119 |
| 95 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 204119 |
| 96 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 204120 |
| 97 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 204120 |
| 98 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 210402 |
| 99 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 210402 |
| 100 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 63658 |
| 101 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 63658 |
| 102 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 199072 |
| 103 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 199072 |
| 104 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 204121 |
| 105 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 204121 |
| 106 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 204122 |
| 107 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 204122 |
| 108 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 210403 |
| 109 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 210403 |
| 110 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 63660 |
| 111 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 63660 |
| 112 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 199074 |
| 113 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 199074 |
| 114 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 204125 |
| 115 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 204125 |
| 116 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 204126 |
| 117 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 204126 |
| 118 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 210405 |
| 119 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 210405 |
| 120 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 63654 |
| 121 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 199076 |
| 122 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 204129 |
| 123 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 204130 |
| 124 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 210407 |
| 125 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 63655 |
| 126 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 197826 |
| 127 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 203026 |
| 128 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 203027 |
| 129 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 209354 |
| 130 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 195860 |
| 131 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 201008 |
| 132 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 201009 |
| 133 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 206462 |
| 134 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 33984 |
| 135 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 195668 |
| 136 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 195669 |
| 137 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 195670 |
| 138 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 200758 |
| 139 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17931209 | 17931213 | 199092 |
| 140 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17931209 | 17931213 | 204151 |
| 141 | 28 | BTA-99379-no-rs | 41666186 | Reproduction | 41646434 | 41646438 | 107048 |
| 142 | 28 | BTA-99379-no-rs | 41666186 | Reproduction | 41646434 | 41646438 | 107227 |
QTL type
#GALLO
par(mar=c(8,20,8,8))
plot_qtl_info(out.qtl, qtl_plot = "qtl_type", cex=1)
QTL type
#GALLO
par(mar=c(10,20,10,10))
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Reproduction")
par(mar=c(10,20,10,10))
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Health")
Reproduction
Health
#GALLO
#QTL enrichment analysis
out.enrich_qtl_name <-qtl_enrich(qtl_db= qtl_UCD1_2,
qtl_file= out.qtl, qtl_type = "Name",
enrich_type = "genome", chr.subset = NULL,
padj = "fdr",nThreads = 2)
# Sorting the dataframe in ascending order of adj.pval
sorted_df <- out.enrich_qtl_name[order(out.enrich_qtl_name$adj.pval), ]
write.csv(sorted_df,"/home/bambrozi/2_CORTISOL/GALLO/out_enrich_qtl_genome_name.csv")
out.enrich_qtl_type <-qtl_enrich(qtl_db= qtl_UCD1_2,
qtl_file= out.qtl, qtl_type = "QTL_type",
enrich_type = "genome", chr.subset = NULL,
padj = "fdr",nThreads = 2)
sorted_df_type <- out.enrich_qtl_type[order(out.enrich_qtl_type$adj.pval), ]
write.csv(out.enrich_qtl_type,"/home/bambrozi/2_CORTISOL/GALLO/out_enrich_qtl_genome_type.csv")
#Plots
#Name
#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_name$ID<-paste(out.enrich_qtl_name$QTL," - ","CHR",out.enrich_qtl_name$CHR,sep="")
#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered<-out.enrich_qtl_name[which(out.enrich_qtl_name$adj.pval<0.05),]
#Plotting the enrichment results for the QTL enrichment analysis
QTLenrich_plot(out.enrich.filtered, x="ID", pval="adj.pval")
#Type
#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_type$ID<-paste(out.enrich_qtl_type$QTL," - ","CHR",out.enrich_qtl_type$CHR,sep="")
#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered_type<-out.enrich_qtl_type[which(out.enrich_qtl_type$adj.pval<0.05),]
#Plotting the enrichment results for the QTL enrichment analysis
QTLenrich_plot(out.enrich.filtered_type, x="ID", pval="adj.pval")
QTL Enrichment outcomes
Enrichment by name (enrichment analysis will be performed for each trait individually)
| X | QTL | N_QTLs | N_QTLs_db | Total_annotated_QTLs | Total_QTLs_db | pvalue | adj.pval | QTL_type |
|---|---|---|---|---|---|---|---|---|
| 5 | Milk C14 index | 35 | 4437 | 108 | 163224 | 0.0000000 | 0.0000000 | Milk |
| 9 | Milk myristoleic acid content | 26 | 3047 | 108 | 163224 | 0.0000000 | 0.0000000 | Milk |
| 10 | Milk palmitoleic acid content | 15 | 2422 | 108 | 163224 | 0.0000000 | 0.0000000 | Milk |
| 6 | Milk C16 index | 12 | 2002 | 108 | 163224 | 0.0000000 | 0.0000001 | Milk |
| 12 | Muscle carnosine content | 3 | 67 | 108 | 163224 | 0.0000131 | 0.0000497 | Meat and Carcass |
| 14 | Pregnancy rate | 2 | 944 | 108 | 163224 | 0.1296627 | 0.4105985 | Reproduction |
| 18 | Teat length | 1 | 300 | 108 | 163224 | 0.1802436 | 0.4892327 | Exterior |
| 15 | Rear leg placement - side view | 1 | 430 | 108 | 163224 | 0.2479752 | 0.5235032 | Exterior |
| 17 | Stillbirth | 2 | 1363 | 108 | 163224 | 0.2280294 | 0.5235032 | Reproduction |
| 3 | Foot angle | 1 | 672 | 108 | 163224 | 0.3596272 | 0.6162877 | Exterior |
| 7 | Milk capric acid content | 1 | 912 | 108 | 163224 | 0.4541067 | 0.6162877 | Milk |
| 8 | Milk myristic acid content | 1 | 902 | 108 | 163224 | 0.4504612 | 0.6162877 | Milk |
| 13 | Net merit | 1 | 903 | 108 | 163224 | 0.4508269 | 0.6162877 | Production |
| 19 | Udder depth | 1 | 695 | 108 | 163224 | 0.3693423 | 0.6162877 | Exterior |
| 16 | Somatic cell score | 1 | 1122 | 108 | 163224 | 0.5253603 | 0.6654564 | Health |
| 2 | Conception rate | 1 | 1255 | 108 | 163224 | 0.5656375 | 0.6716945 | Reproduction |
| 1 | Calving ease | 2 | 3819 | 108 | 163224 | 0.7219243 | 0.7776744 | Reproduction |
| 4 | Length of productive life | 1 | 2004 | 108 | 163224 | 0.7367441 | 0.7776744 | Production |
| 11 | Milk yield | 1 | 6432 | 108 | 163224 | 0.9870080 | 0.9870080 | Milk |
Enrichment by QTL_type (enrichment processes performed for the QTL classes)
| X | QTL | N_QTLs | N_QTLs_db | Total_annotated_QTLs | Total_QTLs_db | pvalue | adj.pval |
|---|---|---|---|---|---|---|---|
| 1 | Exterior | 4 | 9077 | 108 | 163224 | 0.8569775 | 0.9999953 |
| 2 | Health | 1 | 5889 | 108 | 163224 | 0.9811250 | 0.9999953 |
| 3 | Meat and Carcass | 3 | 18258 | 108 | 163224 | 0.9997109 | 0.9999953 |
| 4 | Milk | 91 | 75352 | 108 | 163224 | 0.0000000 | 0.0000000 |
| 5 | Production | 2 | 19640 | 108 | 163224 | 0.9999848 | 0.9999953 |
| 6 | Reproduction | 7 | 35008 | 108 | 163224 | 0.9999953 | 0.9999953 |
online version: https://biit.cs.ut.ee/gprofiler/gost
I got a script from Julia Rodrigues about the R package GPROFILER to perform an enrichment of my Genes.
### enriquecimento genico
#install.packages("gprofiler2")
library(gprofiler2)
#Para conferir a lista de organism -> https://biit.cs.ut.ee/gprofiler/page/organism-list
#Obs: eu entro com os ids ENSOAR...
query <- read.table ("/home/bambrozi/2_CORTISOL/GALLO/out_genes_50k.txt", header = T)
query <- query[,c("gene_id")]
gene_enrich <- gost(
query,
organism = "btaurus",
ordered_query = FALSE,
multi_query = FALSE,
significant = TRUE,
exclude_iea = FALSE,
measure_underrepresentation = FALSE,
evcodes = TRUE,
user_threshold = 0.05,
correction_method = c("fdr"),
domain_scope = c("annotated"),
numeric_ns = "",
sources = NULL,
as_short_link = FALSE,
highlight = FALSE
)
#str(gene_enrich) # para ver o formato dos meus dados
#selecionando apenas as informações da lista que me interessam para fazer meu data.frame
result_enrich <- data.frame(gene_enrich$result)
result_enrich <- data.frame(Category = result_enrich$source,
ID = result_enrich$term_id,
Term = result_enrich$term_name,
adj_pvalue = result_enrich$p_value,
id_ensembl = result_enrich$intersection)
write.table(result_enrich,"/home/bambrozi/2_CORTISOL/GPROFILER/gene_enrich.txt", col.names=TRUE, row.names=FALSE, sep="\t", quote=F)
write.csv(result_enrich,"/home/bambrozi/2_CORTISOL/GPROFILER/gene_enrich.csv")
Below we can se the significant terms of this enrichment (output):
| X | Category | ID | Term | adj_pvalue | id_ensembl |
|---|---|---|---|---|---|
| 1 | GO:BP | GO:0006538 | glutamate catabolic process | 0.0383792 | ENSBTAG00000007540 |
| 2 | GO:BP | GO:0003274 | endocardial cushion fusion | 0.0383792 | ENSBTAG00000019832 |
| 3 | GO:BP | GO:1905005 | regulation of epithelial to mesenchymal transition involved in endocardial cushion formation | 0.0383792 | ENSBTAG00000019832 |
| 4 | GO:BP | GO:1905007 | positive regulation of epithelial to mesenchymal transition involved in endocardial cushion formation | 0.0383792 | ENSBTAG00000019832 |
| 5 | GO:BP | GO:0003430 | growth plate cartilage chondrocyte growth | 0.0383792 | ENSBTAG00000019832 |
| 6 | GO:BP | GO:0003431 | growth plate cartilage chondrocyte development | 0.0383792 | ENSBTAG00000019832 |
| 7 | GO:BP | GO:0003433 | chondrocyte development involved in endochondral bone morphogenesis | 0.0383792 | ENSBTAG00000019832 |
| 8 | GO:BP | GO:0003415 | chondrocyte hypertrophy | 0.0383792 | ENSBTAG00000019832 |
| 9 | GO:BP | GO:0048631 | regulation of skeletal muscle tissue growth | 0.0383792 | ENSBTAG00000011743 |
| 10 | GO:BP | GO:0051138 | positive regulation of NK T cell differentiation | 0.0383792 | ENSBTAG00000019832 |
| 11 | GO:BP | GO:0062043 | positive regulation of cardiac epithelial to mesenchymal transition | 0.0383792 | ENSBTAG00000019832 |
| 12 | GO:BP | GO:0051136 | regulation of NK T cell differentiation | 0.0383792 | ENSBTAG00000019832 |
| 13 | GO:BP | GO:0003186 | tricuspid valve morphogenesis | 0.0383792 | ENSBTAG00000019832 |
| 14 | GO:BP | GO:0062042 | regulation of cardiac epithelial to mesenchymal transition | 0.0383792 | ENSBTAG00000019832 |
| 15 | GO:BP | GO:0060434 | bronchus morphogenesis | 0.0383792 | ENSBTAG00000019832 |
| 16 | GO:BP | GO:0060440 | trachea formation | 0.0383792 | ENSBTAG00000019832 |
| 17 | GO:BP | GO:0060462 | lung lobe development | 0.0383792 | ENSBTAG00000019832 |
| 18 | GO:BP | GO:0060463 | lung lobe morphogenesis | 0.0383792 | ENSBTAG00000019832 |
| 19 | GO:BP | GO:0048632 | negative regulation of skeletal muscle tissue growth | 0.0383792 | ENSBTAG00000011743 |
| 20 | GO:BP | GO:0003175 | tricuspid valve development | 0.0383792 | ENSBTAG00000019832 |
| 21 | GO:BP | GO:0061520 | Langerhans cell differentiation | 0.0383792 | ENSBTAG00000019832 |
| 22 | GO:BP | GO:1905317 | inferior endocardial cushion morphogenesis | 0.0383792 | ENSBTAG00000019832 |
| 23 | GO:BP | GO:2000563 | positive regulation of CD4-positive, alpha-beta T cell proliferation | 0.0383792 | ENSBTAG00000019832 |
| 24 | GO:BP | GO:1990428 | miRNA transport | 0.0383792 | ENSBTAG00000019832 |
| 25 | GO:BP | GO:0002513 | tolerance induction to self antigen | 0.0383792 | ENSBTAG00000019832 |
| 26 | GO:BP | GO:0002514 | B cell tolerance induction | 0.0383792 | ENSBTAG00000019832 |
| 27 | GO:BP | GO:0003149 | membranous septum morphogenesis | 0.0383792 | ENSBTAG00000019832 |
| 28 | GO:BP | GO:1990086 | lens fiber cell apoptotic process | 0.0383792 | ENSBTAG00000019832 |
| 29 | GO:BP | GO:0002520 | immune system development | 0.0383792 | ENSBTAG00000019832,ENSBTAG00000019194 |
| 30 | GO:BP | GO:0002666 | positive regulation of T cell tolerance induction | 0.0383792 | ENSBTAG00000019832 |
| 31 | GO:BP | GO:0002651 | positive regulation of tolerance induction to self antigen | 0.0383792 | ENSBTAG00000019832 |
| 32 | GO:BP | GO:0061343 | cell adhesion involved in heart morphogenesis | 0.0383792 | ENSBTAG00000019832 |
| 33 | GO:BP | GO:0002649 | regulation of tolerance induction to self antigen | 0.0383792 | ENSBTAG00000019832 |
| 34 | GO:BP | GO:0002661 | regulation of B cell tolerance induction | 0.0383792 | ENSBTAG00000019832 |
| 35 | GO:BP | GO:0002664 | regulation of T cell tolerance induction | 0.0383792 | ENSBTAG00000019832 |
| 36 | GO:BP | GO:0002663 | positive regulation of B cell tolerance induction | 0.0383792 | ENSBTAG00000019832 |
| 37 | GO:BP | GO:0002684 | positive regulation of immune system process | 0.0428818 | ENSBTAG00000019832,ENSBTAG00000019872,ENSBTAG00000019194 |
| 38 | GO:BP | GO:0048630 | skeletal muscle tissue growth | 0.0431888 | ENSBTAG00000011743 |
| 39 | GO:BP | GO:0051251 | positive regulation of lymphocyte activation | 0.0431888 | ENSBTAG00000019832,ENSBTAG00000019194 |
| 40 | GO:BP | GO:0034154 | toll-like receptor 7 signaling pathway | 0.0431888 | ENSBTAG00000019872 |
| 41 | GO:BP | GO:0060439 | trachea morphogenesis | 0.0431888 | ENSBTAG00000019832 |
| 42 | GO:BP | GO:0002517 | T cell tolerance induction | 0.0431888 | ENSBTAG00000019832 |
| 43 | GO:BP | GO:0002645 | positive regulation of tolerance induction | 0.0431888 | ENSBTAG00000019832 |
| 44 | GO:BP | GO:0060433 | bronchus development | 0.0460331 | ENSBTAG00000019832 |
| 45 | GO:BP | GO:0003418 | growth plate cartilage chondrocyte differentiation | 0.0460331 | ENSBTAG00000019832 |
| 46 | GO:BP | GO:0002696 | positive regulation of leukocyte activation | 0.0482941 | ENSBTAG00000019832,ENSBTAG00000019194 |
| 47 | GO:BP | GO:0001865 | NK T cell differentiation | 0.0489635 | ENSBTAG00000019832 |
| 48 | GO:BP | GO:0050867 | positive regulation of cell activation | 0.0496277 | ENSBTAG00000019832,ENSBTAG00000019194 |
| 49 | GO:BP | GO:0003198 | epithelial to mesenchymal transition involved in endocardial cushion formation | 0.0496277 | ENSBTAG00000019832 |
| 50 | GO:BP | GO:0002643 | regulation of tolerance induction | 0.0496277 | ENSBTAG00000019832 |
| 51 | GO:BP | GO:2001034 | positive regulation of double-strand break repair via nonhomologous end joining | 0.0496277 | ENSBTAG00000019194 |
| 52 | GO:MF | GO:0004352 | glutamate dehydrogenase (NAD+) activity | 0.0098128 | ENSBTAG00000007540 |
| 53 | GO:MF | GO:0004353 | glutamate dehydrogenase [NAD(P)+] activity | 0.0098128 | ENSBTAG00000007540 |
| 54 | GO:MF | GO:0016639 | oxidoreductase activity, acting on the CH-NH2 group of donors, NAD or NADP as acceptor | 0.0098128 | ENSBTAG00000007540 |
| 55 | GO:MF | GO:0005026 | transforming growth factor beta receptor activity, type II | 0.0147171 | ENSBTAG00000019832 |
| 56 | GO:MF | GO:0048185 | activin binding | 0.0326677 | ENSBTAG00000019832 |
| 57 | GO:MF | GO:0036312 | phosphatidylinositol 3-kinase regulatory subunit binding | 0.0326677 | ENSBTAG00000019872 |
| 58 | GO:MF | GO:0034713 | type I transforming growth factor beta receptor binding | 0.0326677 | ENSBTAG00000019832 |
| 59 | GO:MF | GO:0017002 | activin receptor activity | 0.0326677 | ENSBTAG00000019832 |
| 60 | GO:MF | GO:0005024 | transforming growth factor beta receptor activity | 0.0326677 | ENSBTAG00000019832 |
| 61 | GO:MF | GO:0050431 | transforming growth factor beta binding | 0.0461234 | ENSBTAG00000019832 |
| 62 | GO:MF | GO:0043548 | phosphatidylinositol 3-kinase binding | 0.0461234 | ENSBTAG00000019872 |
| 63 | GO:MF | GO:0016638 | oxidoreductase activity, acting on the CH-NH2 group of donors | 0.0461234 | ENSBTAG00000007540 |
| 64 | GO:MF | GO:0005160 | transforming growth factor beta receptor binding | 0.0461234 | ENSBTAG00000019832 |
| 65 | GO:MF | GO:0004675 | transmembrane receptor protein serine/threonine kinase activity | 0.0461234 | ENSBTAG00000019832 |
| 66 | REAC | REAC:R-BTA-2243919 | Crosslinking of collagen fibrils | 0.0440084 | ENSBTAG00000011743 |
| 67 | REAC | REAC:R-BTA-2173788 | Downregulation of TGF-beta receptor signaling | 0.0440084 | ENSBTAG00000019832 |
| 68 | REAC | REAC:R-BTA-8964539 | Glutamate and glutamine metabolism | 0.0440084 | ENSBTAG00000007540 |
| 69 | REAC | REAC:R-BTA-112308 | Presynaptic depolarization and calcium channel opening | 0.0440084 | ENSBTAG00000013117 |
| 70 | REAC | REAC:R-BTA-2151201 | Transcriptional activation of mitochondrial biogenesis | 0.0440084 | ENSBTAG00000007540 |
| 71 | REAC | REAC:R-BTA-2173791 | TGF-beta receptor signaling in EMT (epithelial to mesenchymal transition) | 0.0440084 | ENSBTAG00000019832 |
From the online version of GPROFILER i got the following results.
Legend
Additionaly I perfomed a another version of this analysis but with no eletronic GO annotation
Now I’m gonna make a pie chart with the categories
library(ggplot2)
# Assuming sig_enrich$Category contains the categories
category_counts <- table(result_enrich$Category)
# Create a pie chart
pie_chart <- ggplot(data = NULL, aes(x = factor(1), fill = names(category_counts), y = as.numeric(category_counts))) +
geom_bar(stat = "identity", width = 1) +
coord_polar(theta = "y") +
labs(title = "Distribution among terms",
x = NULL,
y = NULL,
fill = "Category") +
theme_void() +
theme(legend.position = "right")
# Print the pie chart
print(pie_chart)
First I need to generate the .ped file
#!/bin/bash
DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/inter_file
plink \
--bfile ${DATA} \
--cow \
--recode \
--out ${RESULT}
Second, is necessary to convert the .ped file to Linkage format:
#!/bin/bash
DATA=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/inter_file
RESULT=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/hapblock_in
plink \
--file ${DATA} \
--cow \
--recode HV \
--out ${RESULT}
The code above will generate a pair of files .ped and .info for each chromosome
I ran the haploview only for chromosome 11, 22, 26, 28 that are when the significant SNP are located.
As the LD Plot bring the SNP ordered, first I looked at /home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg.txt, to know the SNP position, and then compare with the Check Markers tab in the haploview SNP position x SNP# in plot
| Chr | SNP | bp | A1 | A2 | Freq | Haploview.order |
|---|---|---|---|---|---|---|
| 11 | BTB-01060598 | 32301594 | C | A | 0.4503970 | 680 |
| 22 | ARS-BFGL-NGS-23411 | 5134078 | A | C | 0.1865080 | 88 |
| 22 | ARS-BFGL-NGS-26419 | 46052082 | A | C | 0.0813492 | 762 |
| 26 | BTB-00926636 | 17857480 | C | A | 0.0257937 | 296 |
| 26 | BTA-62125-no-rs | 17899619 | C | A | 0.0257937 | 297 |
| 28 | BTA-99379-no-rs | 41666186 | A | C | 0.0496032 | 701 |
The two significant SNPs from chromosome 26 fell within a Haploblock.
Chr 11 (SNP 680)
Chr 22 (SNP 88)
Chr 22 (SNP 762)
Chr 26 (SNP 296 and 297)
Chr 28 (SNP 701)
As we found an haploblock on chromosome 26 I decided take a look inside this block. So, on Haploview I checked the name and position of the first SNP of this block (292) and the last (300).
| X. | Name | Position | ObsHET | PredHET | HWpval | X.Geno | FamTrio | MendErr | MAF | Alleles |
|---|---|---|---|---|---|---|---|---|---|---|
| 292 | ARS-BFGL-NGS-111577 | 17714604 | 0.401 | 0.448 | 0.1161 | 100 | 0 | 0 | 0.339 | A:C |
| 293 | ARS-BFGL-NGS-21408 | 17746468 | 0.369 | 0.403 | 0.2260 | 100 | 0 | 0 | 0.280 | C:A |
| 294 | ARS-BFGL-NGS-16328 | 17778908 | 0.056 | 0.054 | 1.0000 | 100 | 0 | 0 | 0.028 | A:C |
| 295 | ARS-BFGL-NGS-117063 | 17817225 | 0.413 | 0.452 | 0.2022 | 100 | 0 | 0 | 0.345 | C:A |
| 296 | BTB-00926636 | 17857480 | 0.052 | 0.050 | 1.0000 | 100 | 0 | 0 | 0.026 | A:C |
| 297 | BTA-62125-no-rs | 17899619 | 0.052 | 0.050 | 1.0000 | 100 | 0 | 0 | 0.026 | A:C |
| 298 | ARS-BFGL-NGS-110679 | 17953795 | 0.397 | 0.433 | 0.2211 | 100 | 0 | 0 | 0.317 | A:C |
| 299 | ARS-BFGL-NGS-72889 | 17989430 | 0.052 | 0.050 | 1.0000 | 100 | 0 | 0 | 0.026 | C:A |
| 300 | BTB-00926868 | 18040673 | 0.385 | 0.420 | 0.2319 | 100 | 0 | 0 | 0.300 | A:C |
Then I checked on NCBI’s Genome Data Viewer which genes are inside this block, adding 50k before the first SNP and 50K after the last SNP.
This interval has 426,069bp and has this genes inside:
As you can see in the image below:
The genes TLL2, TM9SF3 and PIK3AP1 already were in the gene list from GALLO, but the genes DNTT and OPALIN were new!
I performed a GPROFILER analysis only for these genes spotted in this Block 10.
After check the outcome for 5% of significance for SNPs, Prof. Flávio asked me to carry out again considering the interval between 5% and 15% of significance.
Now we are going to apply a correction for multiple testing modifying the Bonferroni test adjusting not for the total number of SNPs but for the number of the independent segments in the genome.
Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")
library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
summarise(total_length = sum(`Seq length`)) %>%
pull(total_length)
# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8
# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)
NeL <- Ne*L_M
# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)
gwas<- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_result.loco.mlma",
head=T, stringsAsFactors = F)
gwas$Chr<- as.factor(gwas$Chr)
gwas$logP<- -log10(gwas$p)
rmv<- which(gwas$logP == "NaN")
if (length(rmv) >=1) {gwas <- gwas[-rmv,]}
bonf05<- -log10(0.05/Me)
bonf15<- -log10(0.15/Me)
library(GHap)
ghap.manhattan(data=gwas,chr="Chr", bp="bp", y="logP", type="p", pch = 20,
cex=1, lwd=1, ylab="", xlab="Chromossomes",
main="GWAS Cortisol", backcolor="#F5EFE780", chr.ang=0,)
abline(h=(bonf05), col="black")
abline(h=(bonf15), col="red2")
legend("topleft", inset=c(0.05, 0.05), col="red2", lwd=2,
legend=c("15% sig. Bonferroni corr. for ind. segments"), bty="n")
legend("topleft", inset=c(0.05, 0.10), col="black", lwd=2,
legend=c("5% sig. Bonferroni corr. for ind. segments"), bty="n")
library(tidyverse)
#Recovering the significant SNPs > <5%
SNP_sig_bonf_ind_seg_5 <- filter(gwas, logP > bonf05)
write.csv(SNP_sig_bonf_ind_seg_5, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg_5.csv")
#Recovering the significant SNPs > 15% and <5%
SNP_sig_bonf_ind_seg_5_15 <- filter(gwas, logP > bonf15 & logP < bonf05)
write.csv(SNP_sig_bonf_ind_seg_5_15, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg_5_15.csv")
#Recovering the significant SNPs > 15%
SNP_sig_bonf_ind_seg_15 <- filter(gwas, logP > bonf15)
write.csv(SNP_sig_bonf_ind_seg_15, file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg_15.csv")
Below we can find the list of significant SNPs after the correction for Multiple Testing
| Chr | X | X15. | X5. | X5.15. | logP |
|---|---|---|---|---|---|
| 28 | BTA-99379-no-rs | X | X | 6.610970 | |
| 22 | ARS-BFGL-NGS-23411 | X | X | 5.485541 | |
| 22 | ARS-BFGL-NGS-26419 | X | X | 5.272074 | |
| 26 | BTA-62125-no-rs | X | X | 4.732155 | |
| 26 | BTB-00926636 | X | X | 4.732155 | |
| 11 | BTB-01060598 | X | X | 4.682532 | |
| 20 | BTB-01471673 | X | X | 4.182974 | |
| 23 | Hapmap54016-rs29022819 | X | X | 4.016453 | |
| 28 | ARS-BFGL-NGS-11935 | X | X | 3.986177 | |
| 28 | Hapmap43005-BTA-64606 | X | X | 3.986177 | |
| 11 | BTA-122098-no-rs | X | X | 3.980028 | |
| 19 | ARS-BFGL-NGS-29119 | X | X | 3.925461 | |
| 14 | Hapmap39823-BTA-35254 | X | X | 3.905277 | |
| 19 | ARS-BFGL-NGS-28901 | X | X | 3.888442 | |
| 3 | ARS-BFGL-NGS-4585 | X | X | 3.885943 | |
| 14 | UA-IFASA-7664 | X | X | 3.882503 |
To make easier to check the name of the significant SNPs I made the following Manhattan Plot
# Prepare the dataset
don <- gwas %>%
group_by(Chr) %>%
summarise(chr_len = max(bp)) %>%
mutate(tot = cumsum(as.numeric(chr_len)) - as.numeric(chr_len)) %>%
select(-chr_len) %>%
left_join(gwas, ., by = c("Chr" = "Chr")) %>%
arrange(Chr, bp) %>%
mutate(BPcum = as.numeric(bp) + tot) %>%
mutate(is_highlight = ifelse(logP > bonf15, "yes", "no")) %>%
mutate(is_annotate = ifelse(logP > bonf15, "yes", "no"))
# Remove rows with missing BPcum values
don <- don %>% filter(!is.na(BPcum))
# Prepare X axis
axisdf <- don %>%
group_by(Chr) %>%
summarize(center = (max(BPcum) + min(BPcum)) / 2)
# Find the maximum logP value for setting y-axis limits
max_logP <- max(don$logP, na.rm = TRUE)
# Make the plot
ggplot(don, aes(x = BPcum, y = logP)) +
geom_point(aes(color = as.factor(Chr)), alpha = 0.8, size = 1.3) +
scale_color_manual(values = rep(c("grey", "skyblue"), 22)) +
scale_x_continuous(label = axisdf$Chr, breaks = axisdf$center) +
scale_y_continuous(expand = c(0, 0), limits = c(0, max_logP + 1)) + # Set y-axis limit
geom_hline(yintercept = bonf05, color = "black") + # Add horizontal line for bonf05
geom_hline(yintercept = bonf15, color = "red2") + # Add horizontal line for bonf15
geom_point(data = subset(don, is_highlight == "yes"), color = "orange", size = 2) +
geom_text_repel(data = subset(don, is_annotate == "yes"), aes(label = SNP), size = 2) +
annotate("text", x = Inf, y = Inf, label = "15% sig. Bonferroni corr. for ind. segments",
color = "red2", hjust = 1.1, vjust = 2, size = 3) +
annotate("text", x = Inf, y = Inf, label = "5% sig. Bonferroni corr. for ind. segments",
color = "black", hjust = 1.1, vjust = 3.5, size = 3) +
theme_bw() +
theme(
legend.position = "none",
panel.border = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
) +
labs(title = "GWAS Cortisol", x = "Chromosomes", y = "-log10(p)")
Generating a file with the GWAS output and rsID
library(tidyverse)
# Extracting SNP names to use in the SNPChimp
SNP_NAMES <- SNP_sig_bonf_ind_seg_15 %>% pull(SNP)
SNP_NAMES <- paste(SNP_NAMES, collapse = ", ")
print(SNP_NAMES)
# Copy this printed names and paste on SNPChimp
# Opening the SNPChimp output in R
rsID <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/SNPchimp_result_ind_seg_15.tsv")
rsID <- rsID[,c("rs","SNP_name")]
#Merging in one file the GWAS output and rsID
SNP_rsID_sig_bonf_ind_seg_15 <- merge(SNP_sig_bonf_ind_seg_15, rsID, by.x = "SNP", by.y = "SNP_name")
SNP_rsID_sig_bonf_ind_seg_15 <- SNP_rsID_sig_bonf_ind_seg_15[, c("rs", "SNP", "Chr", "bp", "A1", "A2", "Freq", "b", "se", "p", "logP")]
write.csv(SNP_rsID_sig_bonf_ind_seg_15, "/home/bambrozi/2_CORTISOL/GWAS/SNP_rsID_bonf_ind_seg_15.csv")
Bellow we can see the table with the SNP names and rsID for all significant SNPs with p-value lower than 0,15.
| X | rs | SNP | Chr | bp | A1 | A2 | Freq | b | se | p | logP |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | rs109578343 | ARS-BFGL-NGS-11935 | 28 | 44942879 | A | C | 0.0297619 | 412.261 | 106.1750 | 0.0001032 | 3.986177 |
| 2 | rs110031217 | ARS-BFGL-NGS-23411 | 22 | 5134078 | A | C | 0.1865080 | 207.116 | 44.5110 | 0.0000033 | 5.485541 |
| 3 | rs110991998 | ARS-BFGL-NGS-26419 | 22 | 46052082 | A | C | 0.0813492 | 308.357 | 67.7592 | 0.0000053 | 5.272074 |
| 4 | rs109751680 | ARS-BFGL-NGS-28901 | 19 | 42338148 | C | A | 0.3710320 | 141.156 | 36.8765 | 0.0001293 | 3.888442 |
| 5 | rs41577586 | ARS-BFGL-NGS-29119 | 19 | 49744592 | A | C | 0.4047620 | -142.084 | 36.9169 | 0.0001187 | 3.925461 |
| 6 | rs109888380 | ARS-BFGL-NGS-4585 | 3 | 107160292 | A | C | 0.3353170 | -144.488 | 37.7607 | 0.0001300 | 3.885943 |
| 7 | rs41624254 | BTA-122098-no-rs | 11 | 31882807 | A | C | 0.4444440 | 131.779 | 33.9688 | 0.0001047 | 3.980028 |
| 8 | rs41644634 | BTA-62125-no-rs | 26 | 17899619 | C | A | 0.0257937 | 487.149 | 113.7690 | 0.0000185 | 4.732155 |
| 9 | rs41567074 | BTA-99379-no-rs | 28 | 41666186 | A | C | 0.0496032 | 435.809 | 84.4338 | 0.0000002 | 6.610970 |
| 10 | rs42089058 | BTB-00926636 | 26 | 17857480 | C | A | 0.0257937 | 487.149 | 113.7690 | 0.0000185 | 4.732155 |
| 11 | rs42217767 | BTB-01060598 | 11 | 32301594 | C | A | 0.4503970 | -152.746 | 35.8860 | 0.0000208 | 4.682532 |
| 12 | rs42590312 | BTB-01471673 | 20 | 11545685 | A | C | 0.2460320 | 169.707 | 42.5157 | 0.0000656 | 4.182974 |
| 13 | rs41632193 | Hapmap39823-BTA-35254 | 14 | 64061208 | A | C | 0.2916670 | -138.029 | 35.9698 | 0.0001244 | 3.905277 |
| 14 | rs41648479 | Hapmap43005-BTA-64606 | 28 | 45805105 | A | C | 0.0297619 | 412.261 | 106.1750 | 0.0001032 | 3.986177 |
| 15 | rs29022819 | Hapmap54016-rs29022819 | 23 | 33609998 | C | A | 0.1904760 | 173.228 | 44.4201 | 0.0000963 | 4.016453 |
| 16 | rs41632223 | UA-IFASA-7664 | 14 | 64299714 | C | A | 0.2678570 | -141.443 | 36.9840 | 0.0001311 | 3.882503 |
pheno <- read.table("/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", header = T)
data_final <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv", header = T)
ids_eq <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv", header = T)
# Create data_final$cdn_id by matching IDs with elora_id
data_final$cdn_id <- ids_eq$cdn_id[match(data_final$ID, ids_eq$elora_id)]
fixeff <- data_final[,c("ID", "BIRTH_YEAR", "Sampling_date", "cdn_id")]
fixeff <- fixeff[fixeff$cdn_id %in% pheno$cdn_id, ]
fixeff$FID <- "HO"
fixeff <- fixeff[, c("FID", "cdn_id", "BIRTH_YEAR", "Sampling_date")]
identical(fixeff$cdn_id, pheno$cdn_id)
write.table(fixeff, "/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/fixeff.txt", quote = F, row.names = F, col.names = T)
#!/bin/bash
DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/GWAS_result
PHENO=/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt
FIXEFF=/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/fixeff.txt
/home/local/bin/gcta \
--bfile ${DATA} \
--mlma-loco \
--pheno ${PHENO} \
--qcovar ${FIXEFF} \
--autosome-num 29 \
--autosome \
--out ${RESULT}
Now we are going to apply a correction for multiple testing modifying the Bonferroni test adjusting not for the total number of SNPs but for the number of the independent segments in the genome.
Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")
library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
summarise(total_length = sum(`Seq length`)) %>%
pull(total_length)
# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8
# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)
NeL <- Ne*L_M
# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)
gwas<- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/GWAS_result.loco.mlma",
head=T, stringsAsFactors = F)
gwas$Chr<- as.factor(gwas$Chr)
gwas$logP<- -log10(gwas$p)
rmv<- which(gwas$logP == "NaN")
if (length(rmv) >=1) {gwas <- gwas[-rmv,]}
bonf<- -log10(0.05/Me)
library(GHap)
ghap.manhattan(data=gwas,chr="Chr", bp="bp", y="logP", type="p", pch = 20,
cex=1, lwd=1, ylab="", xlab="Chromossomes",
main="GWAS Cortisol", backcolor="#F5EFE780", chr.ang=0,)
abline(h=(bonf), col="red2")
legend("topleft", col="red2", lwd=2, c("Bonferroni corr. for ind. segments"), bty="n")
library(tidyverse)
SNP_sig_bonf_ind_seg <- filter(gwas, logP > bonf)
write.table(SNP_sig_bonf_ind_seg, file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/SNP_sig_Bonf_ind_seg.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
write.csv(SNP_sig_bonf_ind_seg, "/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/SNP_sig_Bonf_ind_seg2.csv")
The SNPs below are the significant after the correction above
| X | Chr | SNP | bp | A1 | A2 | Freq | b | se | p | logP |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 22 | ARS-BFGL-NGS-23411 | 5134078 | A | C | 0.1872510 | 196.185 | 43.9934 | 8.20e-06 | 5.085250 |
| 2 | 22 | ARS-BFGL-NGS-26419 | 46052082 | A | C | 0.0816733 | 295.245 | 66.9504 | 1.03e-05 | 4.985404 |
| 3 | 26 | BTB-00926636 | 17857480 | C | A | 0.0258964 | 491.154 | 112.4840 | 1.26e-05 | 4.898635 |
| 4 | 26 | BTA-62125-no-rs | 17899619 | C | A | 0.0258964 | 491.154 | 112.4840 | 1.26e-05 | 4.898635 |
| 5 | 28 | BTA-99379-no-rs | 41666186 | A | C | 0.0498008 | 394.376 | 83.4532 | 2.30e-06 | 5.639587 |
Bellow we inserted the rsID searched at SNPCHIMP
| X | Chr | rsID | SNP | bp | A1 | A2 | Freq | b | se | p | logP |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 22 | rs110031217 | ARS-BFGL-NGS-23411 | 5134078 | A | C | 0.1872510 | 196.185 | 43.9934 | 8.20e-06 | 5.085250 |
| 2 | 22 | rs110991998 | ARS-BFGL-NGS-26419 | 46052082 | A | C | 0.0816733 | 295.245 | 66.9504 | 1.03e-05 | 4.985404 |
| 3 | 26 | rs42089058 | BTB-00926636 | 17857480 | C | A | 0.0258964 | 491.154 | 112.4840 | 1.26e-05 | 4.898635 |
| 4 | 26 | rs41644634 | BTA-62125-no-rs | 17899619 | C | A | 0.0258964 | 491.154 | 112.4840 | 1.26e-05 | 4.898635 |
| 5 | 28 | rs41567074 | BTA-99379-no-rs | 41666186 | A | C | 0.0498008 | 394.376 | 83.4532 | 2.30e-06 | 5.639587 |
gwas<- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/GWAS_result.loco.mlma",
head=T, stringsAsFactors = F)
# Prepare the dataset
don <- gwas %>%
group_by(Chr) %>%
summarise(chr_len = max(bp)) %>%
mutate(tot = cumsum(as.numeric(chr_len)) - as.numeric(chr_len)) %>%
select(-chr_len) %>%
left_join(gwas, ., by = c("Chr" = "Chr")) %>%
arrange(Chr, bp) %>%
mutate(BPcum = as.numeric(bp) + tot) %>%
mutate(is_highlight = ifelse(logP > bonf, "yes", "no")) %>%
mutate(is_annotate = ifelse(logP > bonf, "yes", "no"))
# Remove rows with missing BPcum values
don <- don %>% filter(!is.na(BPcum))
# Prepare X axis
axisdf <- don %>%
group_by(Chr) %>%
summarize(center = (max(BPcum) + min(BPcum)) / 2)
# Find the maximum logP value for setting y-axis limits
max_logP <- max(don$logP, na.rm = TRUE)
# Make the plot
ggplot(don, aes(x = BPcum, y = logP)) +
geom_point(aes(color = as.factor(Chr)), alpha = 0.8, size = 1.3) +
scale_color_manual(values = rep(c("grey", "skyblue"), 22)) +
scale_x_continuous(label = axisdf$Chr, breaks = axisdf$center) +
scale_y_continuous(expand = c(0, 0), limits = c(0, max_logP + 1)) + # Set y-axis limit
geom_point(data = subset(don, is_highlight == "yes"), color = "orange", size = 2) +
geom_text_repel(data = subset(don, is_annotate == "yes"), aes(label = SNP), size = 2) +
theme_bw() +
theme(
legend.position = "none",
panel.border = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
) +
labs(title = "GWAS Cortisol", x = "Chromosomes", y = "-log10(p)")
After insert all rsID on VEP the summary is shown below:
As can be seen, the VEP only analysed 14 variants
Bellow I’m gonna show the “genome view” for each SNP (variant) recovered from VEP:
rs41648479
rs41632223
rs41632193
rs42590312
rs42089058
rs41567074
rs41644634
rs41624254
rs109888380
rs41577586
rs109751680
rs110991998
rs110031217
rs109578343
ps. the unique reference genome available in the VEP is the ARS-UCD1.3, which is not a problem once it is working with the rsID, which for sure is our variant.
After insert all rsID on VEP the summary is shown below:
As can be seen, the VEP only analysed 14 variants
Bellow I’m gonna show the “genome view” for each SNP (variant) recovered from VEP:
rs110031217
rs110991998
rs42089058
rs41644634
rs41567074
ps. the unique reference genome available in the VEP is the ARS-UCD1.3, which is not a problem once it is working with the rsID, which for sure is our variant.
# GALLO
#import a QTL annotation file
qtl_UCD1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Animal_QTLdb_release53_cattleARS_UCD1.gff.gz",file_type="gff")
#import a gene annotation file
gene_UDC1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Bos_taurus.ARS-UCD1.2.110.gtf.gz",file_type="gtf")
#import MARKER files = the GWAS output
gwas <- read.csv(file = "/home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg_15.csv")
# Assuming "gwas" is your dataframe
gwas <- subset(gwas, select = c(Chr, SNP, bp))
colnames(gwas) <- c("CHR","SNP", "BP")
#FINDING GENES AND QTLs ARROUND THE MARKER
#FINDING GENES
out.genes <- find_genes_qtls_around_markers(db_file= gene_UDC1_2,
marker_file= gwas,
method = "gene",
marker = "snp",
interval = 50000,
nThreads = NULL)
write.table(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/Bonf_5_15/out_genes_50k_15.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
write.csv(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/Bonf_5_15/out_genes_50k_15.csv")
#FINDING QTLs
out.qtl <- find_genes_qtls_around_markers(db_file= qtl_UCD1_2,
marker_file= gwas,
method = "qtl",
marker = "snp",
interval = 50000,
nThreads = NULL)
write.table(out.qtl, file = "/home/bambrozi/2_CORTISOL/GALLO/Bonf_5_15/out_qtl_50k_15.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
library(tidyverse)
out.qtl.clean <- select(out.qtl, c("CHR", "SNP", "BP", "QTL_type", "start_pos", "end_pos","QTL_ID"))
write.csv(out.qtl.clean, file = "/home/bambrozi/2_CORTISOL/GALLO/Bonf_5_15/out_qtl_50k_clean_15.csv")
Dowloading the .gtf file from Ensembl https://useast.ensembl.org/info/data/ftp/index.html
Downloading the .gff file from AnimalQTLdb https://www.animalgenome.org/cgi-bin/QTLdb/index
The GALLO output are bellow:
For GENES
| X | CHR | SNP | BP | chr | start_pos | end_pos | width | strand | gene_id | gene_name | gene_biotype |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 3 | ARS-BFGL-NGS-4585 | 107160292 | 3 | 107109148 | 107110689 | 1542 | + | ENSBTAG00000030338 | GJA9 | protein_coding |
| 2 | 3 | ARS-BFGL-NGS-4585 | 107160292 | 3 | 107111822 | 107117831 | 6010 | + | ENSBTAG00000030337 | MYCBP | protein_coding |
| 3 | 3 | ARS-BFGL-NGS-4585 | 107160292 | 3 | 107121920 | 107136338 | 14419 | + | ENSBTAG00000009368 | RRAGC | protein_coding |
| 4 | 11 | BTB-01060598 | 32301594 | 11 | 32278324 | 32766620 | 488297 | - | ENSBTAG00000046199 | NRXN1 | protein_coding |
| 5 | 11 | BTB-01060598 | 32301594 | 11 | 32280416 | 32280512 | 97 | - | ENSBTAG00000044573 | U6 | snRNA |
| 6 | 14 | Hapmap39823-BTA-35254 | 64061208 | 14 | 64047857 | 64103385 | 55529 | + | ENSBTAG00000017833 | RNF19A | protein_coding |
| 7 | 14 | UA-IFASA-7664 | 64299714 | 14 | 64289853 | 64398324 | 108472 | + | ENSBTAG00000019793 | RGS22 | protein_coding |
| 8 | 19 | ARS-BFGL-NGS-29119 | 49744592 | 19 | 49662140 | 49766195 | 104056 | + | ENSBTAG00000052072 | B3GNTL1 | protein_coding |
| 9 | 19 | ARS-BFGL-NGS-29119 | 49744592 | 19 | 49768681 | 49927209 | 158529 | - | ENSBTAG00000015414 | TBCD | protein_coding |
| 10 | 19 | ARS-BFGL-NGS-28901 | 42338148 | 19 | 42283857 | 42299780 | 15924 | - | ENSBTAG00000001195 | KCNH4 | protein_coding |
| 11 | 19 | ARS-BFGL-NGS-28901 | 42338148 | 19 | 42302320 | 42303571 | 1252 | - | ENSBTAG00000000665 | HCRT | protein_coding |
| 12 | 19 | ARS-BFGL-NGS-28901 | 42338148 | 19 | 42307188 | 42312293 | 5106 | - | ENSBTAG00000000666 | GHDC | protein_coding |
| 13 | 19 | ARS-BFGL-NGS-28901 | 42338148 | 19 | 42319170 | 42357910 | 38741 | - | ENSBTAG00000010125 | STAT5B | protein_coding |
| 14 | 19 | ARS-BFGL-NGS-28901 | 42338148 | 19 | 42387164 | 42390885 | 3722 | + | ENSBTAG00000052763 | NA | lncRNA |
| 15 | 22 | ARS-BFGL-NGS-26419 | 46052082 | 22 | 45924535 | 46818511 | 893977 | - | ENSBTAG00000013117 | CACNA2D3 | protein_coding |
| 16 | 22 | ARS-BFGL-NGS-26419 | 46052082 | 22 | 46051998 | 46062657 | 10660 | + | ENSBTAG00000013124 | LRTM1 | protein_coding |
| 17 | 22 | ARS-BFGL-NGS-23411 | 5134078 | 22 | 5091037 | 5184321 | 93285 | + | ENSBTAG00000019832 | TGFBR2 | protein_coding |
| 18 | 23 | Hapmap54016-rs29022819 | 33609998 | 23 | 33618285 | 33618377 | 93 | + | ENSBTAG00000045279 | SNORA70 | snoRNA |
| 19 | 26 | BTB-00926636 | 17857480 | 26 | 17703169 | 17846407 | 143239 | - | ENSBTAG00000011743 | TLL2 | protein_coding |
| 20 | 26 | BTB-00926636 | 17857480 | 26 | 17851539 | 17912665 | 61127 | - | ENSBTAG00000016298 | TM9SF3 | protein_coding |
| 21 | 26 | BTA-62125-no-rs | 17899619 | 26 | 17851539 | 17912665 | 61127 | - | ENSBTAG00000016298 | TM9SF3 | protein_coding |
| 22 | 26 | BTA-62125-no-rs | 17899619 | 26 | 17925643 | 18029878 | 104236 | - | ENSBTAG00000019872 | PIK3AP1 | protein_coding |
| 23 | 28 | BTA-99379-no-rs | 41666186 | 28 | 41609015 | 41649016 | 40002 | - | ENSBTAG00000007540 | GLUD1 | protein_coding |
| 24 | 28 | BTA-99379-no-rs | 41666186 | 28 | 41650333 | 41763371 | 113039 | + | ENSBTAG00000019194 | SHLD2 | protein_coding |
| 25 | 28 | Hapmap43005-BTA-64606 | 45805105 | 28 | 45757443 | 45768564 | 11122 | + | ENSBTAG00000012393 | AGT | protein_coding |
| 26 | 28 | Hapmap43005-BTA-64606 | 45805105 | 28 | 45772443 | 45814853 | 42411 | - | ENSBTAG00000020314 | COG2 | protein_coding |
FOR QTLs
| X | CHR | SNP | BP | QTL_type | start_pos | end_pos | QTL_ID |
|---|---|---|---|---|---|---|---|
| 1 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Reproduction | 107191525 | 107191529 | 40198 |
| 2 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Exterior | 107191525 | 107191529 | 40199 |
| 3 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Reproduction | 107191525 | 107191529 | 40200 |
| 4 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Exterior | 107191525 | 107191529 | 40201 |
| 5 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Milk | 107191525 | 107191529 | 40202 |
| 6 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Milk | 107191525 | 107191529 | 40203 |
| 7 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Production | 107191525 | 107191529 | 40204 |
| 8 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Production | 107191525 | 107191529 | 40205 |
| 9 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Milk | 107191525 | 107191529 | 40206 |
| 10 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Milk | 107191525 | 107191529 | 40207 |
| 11 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Exterior | 107191525 | 107191529 | 40208 |
| 12 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Reproduction | 107191525 | 107191529 | 40209 |
| 13 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Health | 107191525 | 107191529 | 40210 |
| 14 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Reproduction | 107191525 | 107191529 | 40211 |
| 15 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Exterior | 107191525 | 107191529 | 40212 |
| 16 | 3 | ARS-BFGL-NGS-4585 | 107160292 | Exterior | 107191525 | 107191529 | 40213 |
| 17 | 11 | BTA-122098-no-rs | 31882807 | Milk | 31870044 | 31870048 | 122275 |
| 18 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64013785 | 64013789 | 247433 |
| 19 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64013858 | 64013862 | 241580 |
| 20 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64013858 | 64013862 | 252031 |
| 21 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64013897 | 64013901 | 241079 |
| 22 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64013897 | 64013901 | 251434 |
| 23 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64013952 | 64013956 | 241853 |
| 24 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64013952 | 64013956 | 252403 |
| 25 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64015726 | 64015730 | 105628 |
| 26 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64017424 | 64017428 | 252151 |
| 27 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64017970 | 64017974 | 18873 |
| 28 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64061206 | 64061210 | 18872 |
| 29 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64061206 | 64061210 | 105335 |
| 30 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64061206 | 64061210 | 173792 |
| 31 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64061206 | 64061210 | 175101 |
| 32 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64061206 | 64061210 | 176088 |
| 33 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64061206 | 64061210 | 176487 |
| 34 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64061206 | 64061210 | 252723 |
| 35 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64063422 | 64063426 | 175102 |
| 36 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64068329 | 64068333 | 105336 |
| 37 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64081490 | 64081494 | 104637 |
| 38 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64081490 | 64081494 | 104890 |
| 39 | 14 | Hapmap39823-BTA-35254 | 64061208 | Health | 64081490 | 64081494 | 179830 |
| 40 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64087234 | 64087238 | 175103 |
| 41 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64087234 | 64087238 | 252869 |
| 42 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64087450 | 64087454 | 252628 |
| 43 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64088569 | 64088573 | 252926 |
| 44 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64103583 | 64103587 | 104638 |
| 45 | 14 | Hapmap39823-BTA-35254 | 64061208 | Milk | 64103583 | 64103587 | 104891 |
| 46 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64272869 | 64272873 | 102476 |
| 47 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64272869 | 64272873 | 104484 |
| 48 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64272869 | 64272873 | 104863 |
| 49 | 14 | UA-IFASA-7664 | 64299714 | Health | 64272869 | 64272873 | 179764 |
| 50 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64283263 | 64283267 | 175108 |
| 51 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64283263 | 64283267 | 251800 |
| 52 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64283947 | 64283951 | 251398 |
| 53 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64288968 | 64288972 | 247633 |
| 54 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64288968 | 64288972 | 252142 |
| 55 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64299712 | 64299716 | 18875 |
| 56 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64299712 | 64299716 | 105660 |
| 57 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64299712 | 64299716 | 158137 |
| 58 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64299712 | 64299716 | 173793 |
| 59 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64299712 | 64299716 | 175109 |
| 60 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64299712 | 64299716 | 176085 |
| 61 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64299712 | 64299716 | 176486 |
| 62 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64299712 | 64299716 | 250009 |
| 63 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64299712 | 64299716 | 252724 |
| 64 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64302757 | 64302761 | 251934 |
| 65 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64308608 | 64308612 | 252948 |
| 66 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64317001 | 64317005 | 252652 |
| 67 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64322496 | 64322500 | 251752 |
| 68 | 14 | UA-IFASA-7664 | 64299714 | Meat_and_Carcass | 64327478 | 64327482 | 20269 |
| 69 | 14 | UA-IFASA-7664 | 64299714 | Meat_and_Carcass | 64327478 | 64327482 | 20311 |
| 70 | 14 | UA-IFASA-7664 | 64299714 | Meat_and_Carcass | 64327478 | 64327482 | 20393 |
| 71 | 14 | UA-IFASA-7664 | 64299714 | Meat_and_Carcass | 64327478 | 64327482 | 20464 |
| 72 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64327904 | 64327908 | 32596 |
| 73 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64327904 | 64327908 | 32634 |
| 74 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64328322 | 64328326 | 252206 |
| 75 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64339118 | 64339122 | 251497 |
| 76 | 14 | UA-IFASA-7664 | 64299714 | Milk | 64343288 | 64343292 | 252287 |
| 77 | 19 | ARS-BFGL-NGS-28901 | 42338148 | Milk | 42292941 | 42292945 | 147657 |
| 78 | 19 | ARS-BFGL-NGS-28901 | 42338148 | Milk | 42292941 | 42292945 | 147675 |
| 79 | 19 | ARS-BFGL-NGS-28901 | 42338148 | Milk | 42322679 | 42322683 | 173073 |
| 80 | 19 | ARS-BFGL-NGS-28901 | 42338148 | Milk | 42322679 | 42322683 | 173074 |
| 81 | 19 | ARS-BFGL-NGS-28901 | 42338148 | Milk | 42322679 | 42322683 | 173075 |
| 82 | 19 | ARS-BFGL-NGS-28901 | 42338148 | Milk | 42322679 | 42322683 | 173076 |
| 83 | 19 | ARS-BFGL-NGS-28901 | 42338148 | Milk | 42322679 | 42322683 | 173077 |
| 84 | 19 | ARS-BFGL-NGS-28901 | 42338148 | Milk | 42322679 | 42322683 | 173237 |
| 85 | 19 | ARS-BFGL-NGS-28901 | 42338148 | Milk | 42338146 | 42338150 | 166545 |
| 86 | 19 | ARS-BFGL-NGS-28901 | 42338148 | Milk | 42338146 | 42338150 | 166546 |
| 87 | 19 | ARS-BFGL-NGS-28901 | 42338148 | Milk | 42338146 | 42338150 | 253605 |
| 88 | 19 | ARS-BFGL-NGS-28901 | 42338148 | Milk | 42349650 | 42349654 | 147630 |
| 89 | 19 | ARS-BFGL-NGS-29119 | 49744592 | Milk | 49702578 | 49702582 | 195323 |
| 90 | 19 | ARS-BFGL-NGS-29119 | 49744592 | Milk | 49702578 | 49702582 | 195324 |
| 91 | 19 | ARS-BFGL-NGS-29119 | 49744592 | Milk | 49702578 | 49702582 | 195325 |
| 92 | 19 | ARS-BFGL-NGS-29119 | 49744592 | Milk | 49702578 | 49702582 | 205408 |
| 93 | 19 | ARS-BFGL-NGS-29119 | 49744592 | Meat_and_Carcass | 49715846 | 49715850 | 107754 |
| 94 | 19 | ARS-BFGL-NGS-29119 | 49744592 | Milk | 49735877 | 49735881 | 196362 |
| 95 | 19 | ARS-BFGL-NGS-29119 | 49744592 | Milk | 49772086 | 49772090 | 195342 |
| 96 | 20 | BTB-01471673 | 11545685 | Production | 11498161 | 11498165 | 65554 |
| 97 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5109498 | 5109502 | 151609 |
| 98 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5134076 | 5134080 | 151620 |
| 99 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5173197 | 5173201 | 151601 |
| 100 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51802 |
| 101 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51803 |
| 102 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51804 |
| 103 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51805 |
| 104 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Production | 46052080 | 46052084 | 51806 |
| 105 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Production | 46052080 | 46052084 | 51807 |
| 106 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51808 |
| 107 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51809 |
| 108 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Health | 46052080 | 46052084 | 51810 |
| 109 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51811 |
| 110 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51812 |
| 111 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51813 |
| 112 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Milk | 46073517 | 46073521 | 243365 |
| 113 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 195264 |
| 114 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 200269 |
| 115 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 200270 |
| 116 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 205295 |
| 117 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 198859 |
| 118 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 203868 |
| 119 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 203869 |
| 120 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 210297 |
| 121 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 198858 |
| 122 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 203866 |
| 123 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 203867 |
| 124 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 210296 |
| 125 | 26 | BTB-00926636 | 17857480 | Milk | 17811725 | 17811729 | 197493 |
| 126 | 26 | BTB-00926636 | 17857480 | Milk | 17811725 | 17811729 | 202704 |
| 127 | 26 | BTB-00926636 | 17857480 | Milk | 17812273 | 17812277 | 195685 |
| 128 | 26 | BTB-00926636 | 17857480 | Milk | 17812273 | 17812277 | 200785 |
| 129 | 26 | BTB-00926636 | 17857480 | Milk | 17813212 | 17813216 | 195285 |
| 130 | 26 | BTB-00926636 | 17857480 | Milk | 17813212 | 17813216 | 200301 |
| 131 | 26 | BTB-00926636 | 17857480 | Milk | 17813978 | 17813982 | 196236 |
| 132 | 26 | BTB-00926636 | 17857480 | Milk | 17813978 | 17813982 | 201468 |
| 133 | 26 | BTB-00926636 | 17857480 | Milk | 17815237 | 17815241 | 198002 |
| 134 | 26 | BTB-00926636 | 17857480 | Milk | 17815237 | 17815241 | 203190 |
| 135 | 26 | BTB-00926636 | 17857480 | Milk | 17817223 | 17817227 | 34750 |
| 136 | 26 | BTB-00926636 | 17857480 | Milk | 17817223 | 17817227 | 195586 |
| 137 | 26 | BTB-00926636 | 17857480 | Milk | 17821297 | 17821301 | 197235 |
| 138 | 26 | BTB-00926636 | 17857480 | Milk | 17821297 | 17821301 | 202455 |
| 139 | 26 | BTB-00926636 | 17857480 | Milk | 17822209 | 17822213 | 197783 |
| 140 | 26 | BTB-00926636 | 17857480 | Milk | 17822209 | 17822213 | 202983 |
| 141 | 26 | BTB-00926636 | 17857480 | Milk | 17823105 | 17823109 | 196204 |
| 142 | 26 | BTB-00926636 | 17857480 | Milk | 17823105 | 17823109 | 201419 |
| 143 | 26 | BTB-00926636 | 17857480 | Milk | 17824770 | 17824774 | 196867 |
| 144 | 26 | BTB-00926636 | 17857480 | Milk | 17824770 | 17824774 | 202101 |
| 145 | 26 | BTB-00926636 | 17857480 | Milk | 17833551 | 17833555 | 195748 |
| 146 | 26 | BTB-00926636 | 17857480 | Milk | 17833551 | 17833555 | 200855 |
| 147 | 26 | BTB-00926636 | 17857480 | Milk | 17833551 | 17833555 | 206213 |
| 148 | 26 | BTB-00926636 | 17857480 | Milk | 17865069 | 17865073 | 197144 |
| 149 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17865069 | 17865073 | 197144 |
| 150 | 26 | BTB-00926636 | 17857480 | Milk | 17865069 | 17865073 | 202367 |
| 151 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17865069 | 17865073 | 202367 |
| 152 | 26 | BTB-00926636 | 17857480 | Milk | 17865069 | 17865073 | 208485 |
| 153 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17865069 | 17865073 | 208485 |
| 154 | 26 | BTB-00926636 | 17857480 | Milk | 17871201 | 17871205 | 197611 |
| 155 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17871201 | 17871205 | 197611 |
| 156 | 26 | BTB-00926636 | 17857480 | Milk | 17871201 | 17871205 | 202816 |
| 157 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17871201 | 17871205 | 202816 |
| 158 | 26 | BTB-00926636 | 17857480 | Milk | 17871201 | 17871205 | 209076 |
| 159 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17871201 | 17871205 | 209076 |
| 160 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 63656 |
| 161 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 63656 |
| 162 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 199068 |
| 163 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 199068 |
| 164 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 204113 |
| 165 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 204113 |
| 166 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 204114 |
| 167 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 204114 |
| 168 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 210399 |
| 169 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 210399 |
| 170 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 63657 |
| 171 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 63657 |
| 172 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 199069 |
| 173 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 199069 |
| 174 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 204115 |
| 175 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 204115 |
| 176 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 204116 |
| 177 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 204116 |
| 178 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 210400 |
| 179 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 210400 |
| 180 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 199070 |
| 181 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 199070 |
| 182 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 204117 |
| 183 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 204117 |
| 184 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 204118 |
| 185 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 204118 |
| 186 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 210401 |
| 187 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 210401 |
| 188 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 199071 |
| 189 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 199071 |
| 190 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 204119 |
| 191 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 204119 |
| 192 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 204120 |
| 193 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 204120 |
| 194 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 210402 |
| 195 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 210402 |
| 196 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 63658 |
| 197 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 63658 |
| 198 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 199072 |
| 199 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 199072 |
| 200 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 204121 |
| 201 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 204121 |
| 202 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 204122 |
| 203 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 204122 |
| 204 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 210403 |
| 205 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 210403 |
| 206 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 63660 |
| 207 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 63660 |
| 208 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 199074 |
| 209 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 199074 |
| 210 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 204125 |
| 211 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 204125 |
| 212 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 204126 |
| 213 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 204126 |
| 214 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 210405 |
| 215 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 210405 |
| 216 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 63654 |
| 217 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 199076 |
| 218 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 204129 |
| 219 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 204130 |
| 220 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 210407 |
| 221 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 63655 |
| 222 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 197826 |
| 223 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 203026 |
| 224 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 203027 |
| 225 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 209354 |
| 226 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 195860 |
| 227 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 201008 |
| 228 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 201009 |
| 229 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 206462 |
| 230 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 33984 |
| 231 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 195668 |
| 232 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 195669 |
| 233 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 195670 |
| 234 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 200758 |
| 235 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17931209 | 17931213 | 199092 |
| 236 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17931209 | 17931213 | 204151 |
| 237 | 28 | BTA-99379-no-rs | 41666186 | Reproduction | 41646434 | 41646438 | 107048 |
| 238 | 28 | BTA-99379-no-rs | 41666186 | Reproduction | 41646434 | 41646438 | 107227 |
| 239 | 28 | ARS-BFGL-NGS-11935 | 44942879 | Reproduction | 44971508 | 44971512 | 238444 |
| 240 | 28 | ARS-BFGL-NGS-11935 | 44942879 | Production | 44981682 | 44981686 | 66703 |
| 241 | 28 | ARS-BFGL-NGS-11935 | 44942879 | Production | 44981682 | 44981686 | 66704 |
| 242 | 28 | ARS-BFGL-NGS-11935 | 44942879 | Production | 44981682 | 44981686 | 69429 |
| 243 | 28 | Hapmap43005-BTA-64606 | 45805105 | Milk | 45811156 | 45811160 | 34467 |
QTL type
#GALLO
par(mar=c(8,20,8,8))
plot_qtl_info(out.qtl, qtl_plot = "qtl_type", cex=1)
QTL type
#GALLO
par(mar=c(10,20,10,10))
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Reproduction")
par(mar=c(10,20,10,10))
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Health")
Reproduction
Health
#QTL enrichment analysis
out.enrich_qtl_name <-qtl_enrich(qtl_db= qtl_UCD1_2,
qtl_file= out.qtl, qtl_type = "Name",
enrich_type = "genome", chr.subset = NULL,
padj = "fdr",nThreads = 2)
# Sorting the dataframe in ascending order of adj.pval
sorted_df <- out.enrich_qtl_name[order(out.enrich_qtl_name$adj.pval), ]
write.csv(sorted_df,"/home/bambrozi/2_CORTISOL/GALLO/Bonf_5_15/out_enrich_qtl_genome_name_15.csv")
out.enrich_qtl_type <-qtl_enrich(qtl_db= qtl_UCD1_2,
qtl_file= out.qtl, qtl_type = "QTL_type",
enrich_type = "genome", chr.subset = NULL,
padj = "fdr",nThreads = 2)
sorted_df_type <- out.enrich_qtl_type[order(out.enrich_qtl_type$adj.pval), ]
write.csv(out.enrich_qtl_type,"/home/bambrozi/2_CORTISOL/GALLO/Bonf_5_15/out_enrich_qtl_genome_type_15.csv")
#Plots
#Name
#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_name$ID<-paste(out.enrich_qtl_name$QTL," - ","CHR",out.enrich_qtl_name$CHR,sep="")
#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered<-out.enrich_qtl_name[which(out.enrich_qtl_name$adj.pval<0.05),]
#Plotting the enrichment results for the QTL enrichment analysis
QTLenrich_plot(out.enrich.filtered, x="ID", pval="adj.pval")
#Type
#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_type$ID<-paste(out.enrich_qtl_type$QTL," - ","CHR",out.enrich_qtl_type$CHR,sep="")
#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered_type<-out.enrich_qtl_type[which(out.enrich_qtl_type$adj.pval<0.05),]
#Plotting the enrichment results for the QTL enrichment analysis
QTLenrich_plot(out.enrich.filtered_type, x="ID", pval="adj.pval")
QTL Enrichment outcomes
Enrichment by name (enrichment analysis will be performed for each trait individually)
| X | QTL | N_QTLs | N_QTLs_db | Total_annotated_QTLs | Total_QTLs_db | pvalue | adj.pval | QTL_type |
|---|---|---|---|---|---|---|---|---|
| 15 | Milk C14 index | 35 | 4437 | 209 | 163224 | 0.0000000 | 0.0000000 | Milk |
| 28 | Milk myristoleic acid content | 26 | 3047 | 209 | 163224 | 0.0000000 | 0.0000000 | Milk |
| 31 | Milk protein percentage | 39 | 8803 | 209 | 163224 | 0.0000000 | 0.0000000 | Milk |
| 29 | Milk palmitoleic acid content | 15 | 2422 | 209 | 163224 | 0.0000007 | 0.0000079 | Milk |
| 16 | Milk C16 index | 12 | 2002 | 209 | 163224 | 0.0000129 | 0.0001159 | Milk |
| 36 | Muscle carnosine content | 3 | 67 | 209 | 163224 | 0.0000933 | 0.0006998 | Meat and Carcass |
| 23 | Milk lactose content | 3 | 105 | 209 | 163224 | 0.0003523 | 0.0022648 | Milk |
| 20 | Milk fat content | 2 | 92 | 209 | 163224 | 0.0063319 | 0.0356168 | Milk |
| 27 | Milk myristic acid content | 4 | 902 | 209 | 163224 | 0.0294859 | 0.1449683 | Milk |
| 33 | Milk stearic acid content | 2 | 218 | 209 | 163224 | 0.0322152 | 0.1449683 | Milk |
| 30 | Milk pentadecylic acid content | 2 | 280 | 209 | 163224 | 0.0505491 | 0.2067917 | Milk |
| 44 | Teat length | 2 | 300 | 209 | 163224 | 0.0570974 | 0.2141153 | Exterior |
| 14 | Milk arachidic acid content | 1 | 60 | 209 | 163224 | 0.0740084 | 0.2220251 | Milk |
| 25 | Milk margaric acid content | 1 | 59 | 209 | 163224 | 0.0728207 | 0.2220251 | Milk |
| 26 | Milk mid-infrared spectra | 1 | 55 | 209 | 163224 | 0.0680550 | 0.2220251 | Milk |
| 8 | Ketosis | 2 | 441 | 209 | 163224 | 0.1101333 | 0.2915293 | Health |
| 40 | Rear leg placement - side view | 2 | 430 | 209 | 163224 | 0.1056354 | 0.2915293 | Exterior |
| 39 | Pregnancy rate | 3 | 944 | 209 | 163224 | 0.1218012 | 0.3045029 | Reproduction |
| 37 | Myristic acid content | 1 | 120 | 209 | 163224 | 0.1425637 | 0.3376510 | Meat and Carcass |
| 6 | Fat thickness at the 12th rib | 1 | 169 | 209 | 163224 | 0.1947851 | 0.4382665 | Meat and Carcass |
| 7 | Foot angle | 2 | 672 | 209 | 163224 | 0.2129475 | 0.4563160 | Exterior |
| 13 | Maturity rate | 1 | 243 | 209 | 163224 | 0.2677107 | 0.5072331 | Production |
| 17 | Milk C18 index | 1 | 246 | 209 | 163224 | 0.2705243 | 0.5072331 | Milk |
| 42 | Stillbirth | 3 | 1363 | 209 | 163224 | 0.2544359 | 0.5072331 | Reproduction |
| 18 | Milk capric acid content | 2 | 912 | 209 | 163224 | 0.3259016 | 0.5640605 | Milk |
| 38 | Net merit | 2 | 903 | 209 | 163224 | 0.3216964 | 0.5640605 | Production |
| 24 | Milk lauric acid content | 1 | 366 | 209 | 163224 | 0.3746664 | 0.6021425 | Milk |
| 32 | Milk protein yield | 5 | 3093 | 209 | 163224 | 0.3633449 | 0.6021425 | Milk |
| 41 | Somatic cell score | 2 | 1122 | 209 | 163224 | 0.4213659 | 0.6538437 | Health |
| 5 | Dairy form | 1 | 514 | 209 | 163224 | 0.4829472 | 0.7244208 | Exterior |
| 3 | Calving ease | 5 | 3819 | 209 | 163224 | 0.5419946 | 0.7725995 | Reproduction |
| 9 | Lean meat yield | 1 | 621 | 209 | 163224 | 0.5494041 | 0.7725995 | Meat and Carcass |
| 43 | Strength | 1 | 664 | 209 | 163224 | 0.5736508 | 0.7813075 | Exterior |
| 45 | Udder depth | 1 | 695 | 209 | 163224 | 0.5903212 | 0.7813075 | Exterior |
| 19 | Milk caprylic acid content | 1 | 811 | 209 | 163224 | 0.6471450 | 0.8320436 | Milk |
| 10 | Length of productive life | 2 | 2004 | 209 | 163224 | 0.7280866 | 0.8855108 | Production |
| 35 | Milking speed | 1 | 1011 | 209 | 163224 | 0.7273020 | 0.8855108 | Milk |
| 2 | Body weight gain | 1 | 1354 | 209 | 163224 | 0.8248432 | 0.9440581 | Production |
| 4 | Conception rate | 1 | 1255 | 209 | 163224 | 0.8009513 | 0.9440581 | Reproduction |
| 11 | Longissimus muscle area | 1 | 1420 | 209 | 163224 | 0.8391628 | 0.9440581 | Meat and Carcass |
| 12 | Marbling score | 1 | 1817 | 209 | 163224 | 0.9037804 | 0.9830306 | Meat and Carcass |
| 34 | Milk yield | 5 | 6432 | 209 | 163224 | 0.9174952 | 0.9830306 | Milk |
| 1 | Body weight | 2 | 4289 | 209 | 163224 | 0.9746418 | 0.9967928 | Production |
| 21 | Milk fat percentage | 8 | 10941 | 209 | 163224 | 0.9726277 | 0.9967928 | Milk |
| 22 | Milk fat yield | 1 | 8220 | 209 | 163224 | 0.9999797 | 0.9999797 | Milk |
Enrichment by QTL_type (enrichment processes performed for the QTL classes)
| X | QTL | N_QTLs | N_QTLs_db | Total_annotated_QTLs | Total_QTLs_db | pvalue | adj.pval |
|---|---|---|---|---|---|---|---|
| 1 | Exterior | 9 | 9077 | 209 | 163224 | 0.8264587 | 1 |
| 2 | Health | 4 | 5889 | 209 | 163224 | 0.9456284 | 1 |
| 3 | Meat and Carcass | 8 | 18258 | 209 | 163224 | 0.9999640 | 1 |
| 4 | Milk | 168 | 75352 | 209 | 163224 | 0.0000000 | 0 |
| 5 | Production | 8 | 19640 | 209 | 163224 | 0.9999916 | 1 |
| 6 | Reproduction | 12 | 35008 | 209 | 163224 | 1.0000000 | 1 |
# GALLO
#import a QTL annotation file
qtl_UCD1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Animal_QTLdb_release53_cattleARS_UCD1.gff.gz",file_type="gff")
#import a gene annotation file
gene_UDC1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Bos_taurus.ARS-UCD1.2.110.gtf.gz",file_type="gtf")
#import MARKER files = the GWAS output
gwas <- read.csv(file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/SNP_sig_Bonf_ind_seg.csv")
# Assuming "gwas" is your dataframe
gwas <- subset(gwas, select = c(Chr, SNP, bp))
colnames(gwas) <- c("CHR","SNP", "BP")
#FINDING GENES AND QTLs ARROUND THE MARKER
#FINDING GENES
out.genes <- find_genes_qtls_around_markers(db_file= gene_UDC1_2,
marker_file= gwas,
method = "gene",
marker = "snp",
interval = 50000,
nThreads = NULL)
write.table(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/GALLO_BY_SD/out_genes2.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
write.csv(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/GALLO_BY_SD/out_genes2.csv")
#FINDING QTLs
out.qtl <- find_genes_qtls_around_markers(db_file= qtl_UCD1_2,
marker_file= gwas,
method = "qtl",
marker = "snp",
interval = 50000,
nThreads = NULL)
write.table(out.qtl, file = "/home/bambrozi/2_CORTISOL/GALLO/GALLO_BY_SD/out_qtl2.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
library(tidyverse)
out.qtl.clean <- select(out.qtl, c("CHR", "SNP", "BP", "QTL_type", "start_pos", "end_pos","QTL_ID"))
write.csv(out.qtl.clean, file = "/home/bambrozi/2_CORTISOL/GALLO/GALLO_BY_SD/out_qtl_clean2.csv")
Dowloading the .gtf file from Ensembl https://useast.ensembl.org/info/data/ftp/index.html
Downloading the .gff file from AnimalQTLdb https://www.animalgenome.org/cgi-bin/QTLdb/index
The GALLO output are bellow:
For GENES
| X | CHR | SNP | BP | chr | start_pos | end_pos | width | strand | gene_id | gene_name | gene_biotype |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 22 | ARS-BFGL-NGS-26419 | 46052082 | 22 | 45924535 | 46818511 | 893977 | - | ENSBTAG00000013117 | CACNA2D3 | protein_coding |
| 2 | 22 | ARS-BFGL-NGS-26419 | 46052082 | 22 | 46051998 | 46062657 | 10660 | + | ENSBTAG00000013124 | LRTM1 | protein_coding |
| 3 | 22 | ARS-BFGL-NGS-23411 | 5134078 | 22 | 5091037 | 5184321 | 93285 | + | ENSBTAG00000019832 | TGFBR2 | protein_coding |
| 4 | 26 | BTB-00926636 | 17857480 | 26 | 17703169 | 17846407 | 143239 | - | ENSBTAG00000011743 | TLL2 | protein_coding |
| 5 | 26 | BTB-00926636 | 17857480 | 26 | 17851539 | 17912665 | 61127 | - | ENSBTAG00000016298 | TM9SF3 | protein_coding |
| 6 | 26 | BTA-62125-no-rs | 17899619 | 26 | 17851539 | 17912665 | 61127 | - | ENSBTAG00000016298 | TM9SF3 | protein_coding |
| 7 | 26 | BTA-62125-no-rs | 17899619 | 26 | 17925643 | 18029878 | 104236 | - | ENSBTAG00000019872 | PIK3AP1 | protein_coding |
| 8 | 28 | BTA-99379-no-rs | 41666186 | 28 | 41609015 | 41649016 | 40002 | - | ENSBTAG00000007540 | GLUD1 | protein_coding |
| 9 | 28 | BTA-99379-no-rs | 41666186 | 28 | 41650333 | 41763371 | 113039 | + | ENSBTAG00000019194 | SHLD2 | protein_coding |
FOR QTLs
| X | CHR | SNP | BP | QTL_type | start_pos | end_pos | QTL_ID |
|---|---|---|---|---|---|---|---|
| 1 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5109498 | 5109502 | 151609 |
| 2 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5134076 | 5134080 | 151620 |
| 3 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5173197 | 5173201 | 151601 |
| 4 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51802 |
| 5 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51803 |
| 6 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51804 |
| 7 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51805 |
| 8 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Production | 46052080 | 46052084 | 51806 |
| 9 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Production | 46052080 | 46052084 | 51807 |
| 10 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51808 |
| 11 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51809 |
| 12 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Health | 46052080 | 46052084 | 51810 |
| 13 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51811 |
| 14 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51812 |
| 15 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51813 |
| 16 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Milk | 46073517 | 46073521 | 243365 |
| 17 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 195264 |
| 18 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 200269 |
| 19 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 200270 |
| 20 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 205295 |
| 21 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 198859 |
| 22 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 203868 |
| 23 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 203869 |
| 24 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 210297 |
| 25 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 198858 |
| 26 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 203866 |
| 27 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 203867 |
| 28 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 210296 |
| 29 | 26 | BTB-00926636 | 17857480 | Milk | 17811725 | 17811729 | 197493 |
| 30 | 26 | BTB-00926636 | 17857480 | Milk | 17811725 | 17811729 | 202704 |
| 31 | 26 | BTB-00926636 | 17857480 | Milk | 17812273 | 17812277 | 195685 |
| 32 | 26 | BTB-00926636 | 17857480 | Milk | 17812273 | 17812277 | 200785 |
| 33 | 26 | BTB-00926636 | 17857480 | Milk | 17813212 | 17813216 | 195285 |
| 34 | 26 | BTB-00926636 | 17857480 | Milk | 17813212 | 17813216 | 200301 |
| 35 | 26 | BTB-00926636 | 17857480 | Milk | 17813978 | 17813982 | 196236 |
| 36 | 26 | BTB-00926636 | 17857480 | Milk | 17813978 | 17813982 | 201468 |
| 37 | 26 | BTB-00926636 | 17857480 | Milk | 17815237 | 17815241 | 198002 |
| 38 | 26 | BTB-00926636 | 17857480 | Milk | 17815237 | 17815241 | 203190 |
| 39 | 26 | BTB-00926636 | 17857480 | Milk | 17817223 | 17817227 | 34750 |
| 40 | 26 | BTB-00926636 | 17857480 | Milk | 17817223 | 17817227 | 195586 |
| 41 | 26 | BTB-00926636 | 17857480 | Milk | 17821297 | 17821301 | 197235 |
| 42 | 26 | BTB-00926636 | 17857480 | Milk | 17821297 | 17821301 | 202455 |
| 43 | 26 | BTB-00926636 | 17857480 | Milk | 17822209 | 17822213 | 197783 |
| 44 | 26 | BTB-00926636 | 17857480 | Milk | 17822209 | 17822213 | 202983 |
| 45 | 26 | BTB-00926636 | 17857480 | Milk | 17823105 | 17823109 | 196204 |
| 46 | 26 | BTB-00926636 | 17857480 | Milk | 17823105 | 17823109 | 201419 |
| 47 | 26 | BTB-00926636 | 17857480 | Milk | 17824770 | 17824774 | 196867 |
| 48 | 26 | BTB-00926636 | 17857480 | Milk | 17824770 | 17824774 | 202101 |
| 49 | 26 | BTB-00926636 | 17857480 | Milk | 17833551 | 17833555 | 195748 |
| 50 | 26 | BTB-00926636 | 17857480 | Milk | 17833551 | 17833555 | 200855 |
| 51 | 26 | BTB-00926636 | 17857480 | Milk | 17833551 | 17833555 | 206213 |
| 52 | 26 | BTB-00926636 | 17857480 | Milk | 17865069 | 17865073 | 197144 |
| 53 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17865069 | 17865073 | 197144 |
| 54 | 26 | BTB-00926636 | 17857480 | Milk | 17865069 | 17865073 | 202367 |
| 55 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17865069 | 17865073 | 202367 |
| 56 | 26 | BTB-00926636 | 17857480 | Milk | 17865069 | 17865073 | 208485 |
| 57 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17865069 | 17865073 | 208485 |
| 58 | 26 | BTB-00926636 | 17857480 | Milk | 17871201 | 17871205 | 197611 |
| 59 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17871201 | 17871205 | 197611 |
| 60 | 26 | BTB-00926636 | 17857480 | Milk | 17871201 | 17871205 | 202816 |
| 61 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17871201 | 17871205 | 202816 |
| 62 | 26 | BTB-00926636 | 17857480 | Milk | 17871201 | 17871205 | 209076 |
| 63 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17871201 | 17871205 | 209076 |
| 64 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 63656 |
| 65 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 63656 |
| 66 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 199068 |
| 67 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 199068 |
| 68 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 204113 |
| 69 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 204113 |
| 70 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 204114 |
| 71 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 204114 |
| 72 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 210399 |
| 73 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 210399 |
| 74 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 63657 |
| 75 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 63657 |
| 76 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 199069 |
| 77 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 199069 |
| 78 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 204115 |
| 79 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 204115 |
| 80 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 204116 |
| 81 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 204116 |
| 82 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 210400 |
| 83 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 210400 |
| 84 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 199070 |
| 85 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 199070 |
| 86 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 204117 |
| 87 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 204117 |
| 88 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 204118 |
| 89 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 204118 |
| 90 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 210401 |
| 91 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 210401 |
| 92 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 199071 |
| 93 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 199071 |
| 94 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 204119 |
| 95 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 204119 |
| 96 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 204120 |
| 97 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 204120 |
| 98 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 210402 |
| 99 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 210402 |
| 100 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 63658 |
| 101 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 63658 |
| 102 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 199072 |
| 103 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 199072 |
| 104 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 204121 |
| 105 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 204121 |
| 106 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 204122 |
| 107 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 204122 |
| 108 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 210403 |
| 109 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 210403 |
| 110 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 63660 |
| 111 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 63660 |
| 112 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 199074 |
| 113 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 199074 |
| 114 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 204125 |
| 115 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 204125 |
| 116 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 204126 |
| 117 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 204126 |
| 118 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 210405 |
| 119 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 210405 |
| 120 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 63654 |
| 121 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 199076 |
| 122 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 204129 |
| 123 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 204130 |
| 124 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 210407 |
| 125 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 63655 |
| 126 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 197826 |
| 127 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 203026 |
| 128 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 203027 |
| 129 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 209354 |
| 130 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 195860 |
| 131 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 201008 |
| 132 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 201009 |
| 133 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 206462 |
| 134 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 33984 |
| 135 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 195668 |
| 136 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 195669 |
| 137 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 195670 |
| 138 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 200758 |
| 139 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17931209 | 17931213 | 199092 |
| 140 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17931209 | 17931213 | 204151 |
| 141 | 28 | BTA-99379-no-rs | 41666186 | Reproduction | 41646434 | 41646438 | 107048 |
| 142 | 28 | BTA-99379-no-rs | 41666186 | Reproduction | 41646434 | 41646438 | 107227 |
QTL type
#GALLO
par(mar=c(8,20,8,8))
plot_qtl_info(out.qtl, qtl_plot = "qtl_type", cex=1)
QTL type
#GALLO
par(mar=c(10,20,10,10))
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Reproduction")
par(mar=c(10,20,10,10))
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Health")
Reproduction
Health
#QTL enrichment analysis
out.enrich_qtl_name <-qtl_enrich(qtl_db= qtl_UCD1_2,
qtl_file= out.qtl, qtl_type = "Name",
enrich_type = "genome", chr.subset = NULL,
padj = "fdr",nThreads = 2)
# Sorting the dataframe in ascending order of adj.pval
sorted_df <- out.enrich_qtl_name[order(out.enrich_qtl_name$adj.pval), ]
write.csv(sorted_df,"/home/bambrozi/2_CORTISOL/GALLO/GALLO_BY_SD/out_enrich_qtl_genome_name_2.csv")
out.enrich_qtl_type <-qtl_enrich(qtl_db= qtl_UCD1_2,
qtl_file= out.qtl, qtl_type = "QTL_type",
enrich_type = "genome", chr.subset = NULL,
padj = "fdr",nThreads = 2)
sorted_df_type <- out.enrich_qtl_type[order(out.enrich_qtl_type$adj.pval), ]
write.csv(out.enrich_qtl_type,"/home/bambrozi/2_CORTISOL/GALLO/GALLO_BY_SD/out_enrich_qtl_genome_type_2.csv")
#Plots
#Name
#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_name$ID<-paste(out.enrich_qtl_name$QTL," - ","CHR",out.enrich_qtl_name$CHR,sep="")
#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered<-out.enrich_qtl_name[which(out.enrich_qtl_name$adj.pval<0.05),]
#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered, x="ID", pval="adj.pval")
#Type
#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_type$ID<-paste(out.enrich_qtl_type$QTL," - ","CHR",out.enrich_qtl_type$CHR,sep="")
#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered_type<-out.enrich_qtl_type[which(out.enrich_qtl_type$adj.pval<0.05),]
#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered_type, x="ID", pval="adj.pval")
QTL Enrichment outcomes
Enrichment by name (enrichment analysis will be performed for each trait individually)
| X | QTL | N_QTLs | N_QTLs_db | Total_annotated_QTLs | Total_QTLs_db | pvalue | adj.pval | QTL_type |
|---|---|---|---|---|---|---|---|---|
| 5 | Milk C14 index | 35 | 4437 | 108 | 163224 | 0.0000000 | 0.0000000 | Milk |
| 9 | Milk myristoleic acid content | 26 | 3047 | 108 | 163224 | 0.0000000 | 0.0000000 | Milk |
| 10 | Milk palmitoleic acid content | 15 | 2422 | 108 | 163224 | 0.0000000 | 0.0000000 | Milk |
| 6 | Milk C16 index | 12 | 2002 | 108 | 163224 | 0.0000000 | 0.0000001 | Milk |
| 12 | Muscle carnosine content | 3 | 67 | 108 | 163224 | 0.0000131 | 0.0000497 | Meat and Carcass |
| 14 | Pregnancy rate | 2 | 944 | 108 | 163224 | 0.1296627 | 0.4105985 | Reproduction |
| 18 | Teat length | 1 | 300 | 108 | 163224 | 0.1802436 | 0.4892327 | Exterior |
| 15 | Rear leg placement - side view | 1 | 430 | 108 | 163224 | 0.2479752 | 0.5235032 | Exterior |
| 17 | Stillbirth | 2 | 1363 | 108 | 163224 | 0.2280294 | 0.5235032 | Reproduction |
| 3 | Foot angle | 1 | 672 | 108 | 163224 | 0.3596272 | 0.6162877 | Exterior |
| 7 | Milk capric acid content | 1 | 912 | 108 | 163224 | 0.4541067 | 0.6162877 | Milk |
| 8 | Milk myristic acid content | 1 | 902 | 108 | 163224 | 0.4504612 | 0.6162877 | Milk |
| 13 | Net merit | 1 | 903 | 108 | 163224 | 0.4508269 | 0.6162877 | Production |
| 19 | Udder depth | 1 | 695 | 108 | 163224 | 0.3693423 | 0.6162877 | Exterior |
| 16 | Somatic cell score | 1 | 1122 | 108 | 163224 | 0.5253603 | 0.6654564 | Health |
| 2 | Conception rate | 1 | 1255 | 108 | 163224 | 0.5656375 | 0.6716945 | Reproduction |
| 1 | Calving ease | 2 | 3819 | 108 | 163224 | 0.7219243 | 0.7776744 | Reproduction |
| 4 | Length of productive life | 1 | 2004 | 108 | 163224 | 0.7367441 | 0.7776744 | Production |
| 11 | Milk yield | 1 | 6432 | 108 | 163224 | 0.9870080 | 0.9870080 | Milk |
Enrichment by QTL_type (enrichment processes performed for the QTL classes)
| X | QTL | N_QTLs | N_QTLs_db | Total_annotated_QTLs | Total_QTLs_db | pvalue | adj.pval |
|---|---|---|---|---|---|---|---|
| 1 | Exterior | 4 | 9077 | 108 | 163224 | 0.8569775 | 0.9999953 |
| 2 | Health | 1 | 5889 | 108 | 163224 | 0.9811250 | 0.9999953 |
| 3 | Meat and Carcass | 3 | 18258 | 108 | 163224 | 0.9997109 | 0.9999953 |
| 4 | Milk | 91 | 75352 | 108 | 163224 | 0.0000000 | 0.0000000 |
| 5 | Production | 2 | 19640 | 108 | 163224 | 0.9999848 | 0.9999953 |
| 6 | Reproduction | 7 | 35008 | 108 | 163224 | 0.9999953 | 0.9999953 |
From the online version of GPROFILER i got the following results.
Legend
First I need to generate the .ped file
#!/bin/bash
DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/inter_file
plink \
--bfile ${DATA} \
--cow \
--recode \
--out ${RESULT}
Second, is necessary to convert the .ped file to Linkage format:
#!/bin/bash
DATA=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/inter_file
RESULT=/home/bambrozi/2_CORTISOL/HAPLOBLOCK/hapblock_in
plink \
--file ${DATA} \
--cow \
--recode HV \
--out ${RESULT}
The code above will generate a pair of files .ped and .info for each chromosome
I ran the haploview only for chromosome 03, 11, 14, 19, 20, 22, 23, 26 and 28 that are when the significant SNP are located.
As the LD Plot bring the SNP ordered, first I looked at /home/bambrozi/2_CORTISOL/GWAS/SNP_sig_Bonf_ind_seg_15.csv, to know the SNP position, and then compare with the Check Markers tab in the haploview SNP position x SNP# in plot
| VEP | rs | SNP | Chr | bp | A1 | A2 | Freq | Haploview.Order | Block |
|---|---|---|---|---|---|---|---|---|---|
| yes | rs109888380 | ARS-BFGL-NGS-4585 | 3 | 107160292 | A | C | 0.3353170 | 1818 | |
| yes | rs41624254 | BTA-122098-no-rs | 11 | 31882807 | A | C | 0.4444440 | 674 | |
| no | rs42217767 | BTB-01060598 | 11 | 32301594 | C | A | 0.4503970 | 680 | |
| yes | rs41632193 | Hapmap39823-BTA-35254 | 14 | 64061208 | A | C | 0.2916670 | 1196 | |
| yes | rs41632223 | UA-IFASA-7664 | 14 | 64299714 | C | A | 0.2678570 | 1200 | |
| yes | rs109751680 | ARS-BFGL-NGS-28901 | 19 | 42338148 | C | A | 0.3710320 | 713 | |
| yes | rs41577586 | ARS-BFGL-NGS-29119 | 19 | 49744592 | A | C | 0.4047620 | 849 | Block 38 |
| yes | rs42590312 | BTB-01471673 | 20 | 11545685 | A | C | 0.2460320 | 232 | |
| yes | rs110031217 | ARS-BFGL-NGS-23411 | 22 | 5134078 | A | C | 0.1865080 | 88 | |
| yes | rs110991998 | ARS-BFGL-NGS-26419 | 22 | 46052082 | A | C | 0.0813492 | 762 | |
| no | rs29022819 | Hapmap54016-rs29022819 | 23 | 33609998 | C | A | 0.1904760 | 534 | |
| yes | rs41644634 | BTA-62125-no-rs | 26 | 17899619 | C | A | 0.0257937 | 297 | Block 10 |
| yes | rs42089058 | BTB-00926636 | 26 | 17857480 | C | A | 0.0257937 | 296 | Block 10 |
| yes | rs109578343 | ARS-BFGL-NGS-11935 | 28 | 44942879 | A | C | 0.0297619 | 773 | |
| yes | rs41567074 | BTA-99379-no-rs | 28 | 41666186 | A | C | 0.0496032 | 701 | |
| yes | rs41648479 | Hapmap43005-BTA-64606 | 28 | 45805105 | A | C | 0.0297619 | 791 | Block 21 |
As can be observed in the column “Block”,four significant SNPs fell within a Haploblock.
rs41577586 (ARS-BFGL-NGS-29119) - chr 19 - order 849 - Block 38 rs41644634 (BTA-62125-no-rs) - chr 26 - order 297 - Block 10 rs42089058 (BTB-00926636) - chr 26 - order 296 - Block 10 rs41648479 (Hapmap43005-BTA-64606) - chr 28 - order 791 - Block 21
Chr 3 (SNP 1818)
Chr 11 (SNP 674 and
680)
Chr 14 (SNP 1196 and
1200)
Chr 19 (SNP 713)
Chr 19 (SNP 849)
Chr 20 (SNP 232)
Chr 22 (SNP 88)
Chr 22 (SNP 762)
Chr 23 (SNP 534)
Chr 26 (SNP 296 and
297)
Chr 28 (SNP 701)
Chr 28 (SNP 773 and
791)
I received from Umesh a e-mail informing the three categories that the animals could be sorted based on their cortisol concentration.
pheno <- read.table("/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", header = T)
data_final <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv", header = T)
ids_eq <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv", header = T)
# Create an matrix with fixed effects with only those animals which also have phenotype and genotype
data_final$cdn_id <- ids_eq$cdn_id[match(data_final$ID, ids_eq$elora_id)]
fixeff <- data_final[,c("ID", "BIRTH_YEAR", "Sampling_date", "cdn_id")]
fixeff <- fixeff[fixeff$cdn_id %in% pheno$cdn_id, ]
fixeff$FID <- "HO"
fixeff <- fixeff[, c("FID", "cdn_id", "BIRTH_YEAR", "Sampling_date")]
# Now we are gona remove the intermediary animals from pheno object
pheno$Categorical <- ifelse(pheno$cortisol >= 956, "H",
ifelse(pheno$cortisol <= 190.8, "L", "M"))
table(pheno$Categorical)
pheno <- pheno[pheno$Categorical != "M", ]
pheno <- pheno[, c("FID", "cdn_id", "cortisol")]
# Now we are going to remove from fixeff the animals which are not in pheno
fixeff <- fixeff[fixeff$cdn_id %in% pheno$cdn_id,]
#Checking if match the animals and order
identical(fixeff$cdn_id, pheno$cdn_id)
#Creating a file with animals to keep in the genotype file, we will use it on Plink
to_keep_geno <- pheno[, c("FID", "cdn_id")]
write.table(fixeff, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/fixeff.txt", quote = F, row.names = F, col.names = T)
write.table(pheno, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/pheno.txt", quote = F, row.names = F, col.names = T)
write.table(to_keep_geno, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/to_keep_geno.txt", quote = F, row.names = F, col.names = F)
We ended up with
H (Hight) = 34 animals
L (Low) = 37 animals Total = 71 animals
On Plink we will remove all individuals from genotype files that are classified as Medium, keeping only the High and Low
#!/bin/bash
DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/geno_extreme
KEEP=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/to_keep_geno.txt
plink2 --bfile ${DATA} --keep ${KEEP} --chr-set 29 --make-bed --out ${RESULT}
#!/bin/bash
DATA=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/geno_extreme
RESULT=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/GWAS_result
PHENO=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/pheno.txt
FIXEFF=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/fixeff.txt
/home/local/bin/gcta \
--bfile ${DATA} \
--mlma-loco \
--pheno ${PHENO} \
--qcovar ${FIXEFF} \
--autosome-num 29 \
--autosome \
--out ${RESULT}
After ran the GWAS above I got the following message from the GCTA:
Error: analysis stopped because more than half of the variance components are constrained. The result would be unreliable. You may have a try of the option –reml-no-constrain.
As we got this error message, we needed to solve this problem, and for that we used the whole sample size (252 individuals) to estimate the variance components in GCTA, and after this, using this variance components from the whole sample we perfomed ths GWAS with the subset of individuals (34 High + 37 Low = 71)
According to the BLUPF90+ tutorial:
ssGWAS is an iterative procedure with 2 steps:
0) Quality control
1) prediction of GEBV with ssGBLUP
2) prediction of SNP marker effects based on the GEBV
Preparing files to run Variance components estimation using REML with AI (Average Information) algorithm.
First you need to create a directory in your home directory, prepare and save the following files in:
The appearance of this file is like this:
FIRST COLUMN = Animal ID
SECOND COLUMN = Phenotype
THIRD COLUMN = Fixed Effect 1
FOURTH COLUMN = Fixed Effect 2
First we are going to generate a Phenotype_Fixed_Effect file with the whole sample (252 individuals) that we are going to use for the Variance Components Estimation.
To get in one file these four columns we need the following code:
fixeff <- read.table("/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/fixeff.txt", header = T)
pheno <- read.table("/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", header = T)
ids_eq <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv", header = T)
fixeff <- fixeff[, c("cdn_id", "BIRTH_YEAR", "Sampling_date")]
pheno <- pheno[,c("cdn_id", "cortisol")]
# Load necessary libraries
library(dplyr)
# Merge pheno and fixeff data frames
merged_data <- pheno %>%
left_join(fixeff, by = "cdn_id")
merged_data$iid <- ids_eq$iid[match(merged_data$cdn_id, ids_eq$cdn_id)]
merged_data <- merged_data[, c("iid", "cortisol", "BIRTH_YEAR", "Sampling_date")]
write.table(merged_data, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/fenofix.txt", col.names = F, quote = F, row.names = F)
The file should be saved as text file, with separation by space and no columns names.
PS: if there are any NA, they sould be replaced by 9999
The appearance of this file is like this:
FIRST COLUMN = Animal ID
SECOND COLUMN = Sire ID
THIRD COLUMN = Dam ID
The file should be saved as text file, with separation by space and no columns names.
We used the code below to remove the commas of a .csv file to a file with sepation by spaces.
# to replace comma for space in the .csv file with the equivalence among IDs
sed -i 's/,/ /g' bruno_ids.csv
First we are going to generate a Genotype file with the whole sample (252 individuals) that we are going to use for the Variance Components Estimation.
The appearance of this file is like this:
FIRST COLUMN = Animal ID SECOND COLUMN = Genotypes (0, 1 and 2 format)
The file should be saved as text file, with separation by space and no columns names.
We used the code below to replace the cid for iid. First we merge using the second column of the firs file, and the first column of the second file. Then we use again the command awk to keep only the third and fifth columsn and sabe in a different object.
# Using the awk function to merge the two files and the second awk to select only the 3rd and 5fh columns
awk 'FNR==NR {a[$2]=$0; next} {print a[$1], $0}' bruno_ids.csv bruno_gntps.txt | awk '{print$3,$5}' > bruno_gntps_iid
mapfile <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/DGVsnpinfo.2404.ho", header = T)
colnames(mapfile)
colnames(mapfile) <- c("SNP_ID", "CHR", "POS")
mapfile <- mapfile[,c("CHR", "POS", "SNP_ID")]
write.table(mapfile, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/snpmap.txt", col.names = T, row.names = F, quote = F)
But, before running the GEBV we will first perform one additional step to “CLEAN” our genotypes. Actually BLUPF90 by default perform a data cleaning with pre set parameters, but as HWE is not used by default, we will perform this additional step.
The Parameter card for this step is the parameter bellow:
/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/renum_QC.par
DATAFILE
fenofix.txt
TRAITS
2
FIELDS_PASSED TO OUTPUT
WEIGHT(S)
RESIDUAL_VARIANCE
1.0
EFFECT
3 cross numer
EFFECT
4 cross alpha
EFFECT
1 cross alpha
RANDOM
animal
FILE
bruno_ped_iid_blup.txt
FILE_POS
1 2 3 0 0
SNP_FILE
bruno_gntps_iid
PED_DEPTH
0
(CO)VARIANCES
1.0
OPTION outcallrate
OPTION saveCleanSNPs
OPTION minfreq 0.01
OPTION map_file snpmap.txt
OPTION excludeCHR 30 31
To run any softwere from Blupf90 suit we will perfom always in this way:
ssh grand
cd /home/bambrozi/2_CORTISOL/GWAS/BLUPF90
chmod +x renumf90
chmod +x blupf90+
./renumf90
When you run the code above, it will ask you the name of your parameter card, for this step is renum_QC.par.
The command above will generate couple files, among them renf90.par
We modified renf90.par in:The parameter card to perform the Quality Control is: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/renf90_DataClean.par
It will be run using the software pre preGS90 to generate the Clean Genotype and SNP_MAP files after Quality Control.
# BLUPF90 parameter file created by RENUMF90
DATAFILE
renf90.dat
NUMBER_OF_TRAITS
1
NUMBER_OF_EFFECTS
3
OBSERVATION(S)
1
WEIGHT(S)
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
2 4 cross
3 23 cross
4 3724 cross
RANDOM_RESIDUAL VALUES
1.0000
RANDOM_GROUP
3
RANDOM_TYPE
add_an_upginb
FILE
renadd03.ped
(CO)VARIANCES
1.0000
OPTION SNP_file bruno_gntps_iid
OPTION outcallrate
OPTION saveCleanSNPs
OPTION minfreq 0.01
OPTION map_file snpmap.txt
OPTION excludeCHR 30 31
The second parameter card used for Variance Components Estimation (VCE) is the following: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/renf90_VarCompEst.par
It will be run using the software pre blupf90+ to generate the VCE.
# BLUPF90 parameter file created by RENUMF90
DATAFILE
renf90.dat
NUMBER_OF_TRAITS
1
NUMBER_OF_EFFECTS
3
OBSERVATION(S)
1
WEIGHT(S)
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
2 4 cross
3 23 cross
4 3724 cross
RANDOM_RESIDUAL VALUES
1.0000
RANDOM_GROUP
3
RANDOM_TYPE
add_an_upginb
FILE
renadd03.ped
(CO)VARIANCES
1.0000
OPTION SNP_file bruno_gntps_iid_clean
OPTION no_quality_control
OPTION method VCE
OPTION origID
OPTION missing 9999
OPTION se_covar_function H2_1 g_3_3_1_1/(g_3_3_1_1+r_1_1)
In the parameter card above, we remove the option for Quality Control and added options for Variance Components Estimation, for Missing data, for origID and for heritability estimation, but the MOST IMPORTANT PART is we need to change the OPTION SNP_FILE, replacing the original genotype file, for the “clean” version generated in the previous step.
The Variance Components will be placed in the file: /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/blupf90.log
blupf90.log
Now you should update you renf90_VarCompEst.par file with these informations from the .log file
Copy Residual Variance from blupf90.log and will paste on renf90_VarCompEst.par RANDOM_RESIDUAL_VALUES Copy Genetic variance for effect x from blupf90.log and will paste on renf90_VarCompEst.par (CO) VARIANCE
If the Residual Variance and Genetic variance for effect x didn’t change in your blupf90.log the analysis ended, but if this value vary, you should update again the renf90.par and run again blupf90+ until this values don’t change more.
blupf90.log
Parameter card
Now we’ll run a new analysis using the Variance Components Estimation from the previous step to perform the GWAS.
To perform this first we need a Phenotype file, and a Genotype file only with the 71 animals (High=34 and Low=37)
I used the code bellow to remove individuals with MEDIUM cortisol Phenotype
phenofix <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/fenofix.txt")
# Now we are gona remove the intermediary animals from pheno object
phenofix$Categorical <- ifelse(phenofix$V2 >= 956, "H",
ifelse(phenofix$V2 <= 190.8, "L", "M"))
table(phenofix$Categorical)
phenofix <- phenofix[phenofix$Categorical != "M", ]
phenofix <- phenofix[, c("V1", "V2", "V3", "V4")]
write.table(phenofix, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/fenofix.txt", col.names = F, row.names = F, quote = F)
I used this command line bellow to remove the individuals with MEDIUM phenotypes.
awk 'NR==FNR{ids[$1]; next} $1 in ids' fenofix.txt bruno_gntps_iid > bruno_gntps_iid_71
/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/renum_QC.par
DATAFILE
fenofix.txt
TRAITS
2
FIELDS_PASSED TO OUTPUT
WEIGHT(S)
RESIDUAL_VARIANCE
77182
EFFECT
3 cross numer
EFFECT
4 cross alpha
EFFECT
1 cross alpha
RANDOM
animal
FILE
bruno_ped_iid_blup.txt
FILE_POS
1 2 3 0 0
SNP_FILE
bruno_gntps_iid_71
PED_DEPTH
0
(CO)VARIANCES
28212
OPTION outcallrate
OPTION saveCleanSNPs
OPTION minfreq 0.01
OPTION map_file snpmap.txt
OPTION excludeCHR 30 31
We modified renf90.par in e copies:
To run the parameter card bellow we are going to use the software presGSf90 /home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/renf90_DataClean.par
# BLUPF90 parameter file created by RENUMF90
DATAFILE
renf90.dat
NUMBER_OF_TRAITS
1
NUMBER_OF_EFFECTS
3
OBSERVATION(S)
1
WEIGHT(S)
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
2 4 cross
3 22 cross
4 3724 cross
RANDOM_RESIDUAL VALUES
77182.
RANDOM_GROUP
3
RANDOM_TYPE
add_an_upginb
FILE
renadd03.ped
(CO)VARIANCES
28212.
OPTION SNP_file bruno_gntps_iid_71
OPTION outcallrate
OPTION saveCleanSNPs
OPTION minfreq 0.01
OPTION map_file snpmap.txt
OPTION excludeCHR 30 31
May be necessary to run the command bellow on the server Setting the stack size to “unlimited” allows the program to allocate memory for these large structures without hitting stack limits. By removing stack size limits, BLUPF90 is less likely to encounter segmentation faults or memory allocation issues that arise when the stack space is insufficient for the computations needed.
ulimit -s unlimited
The parameter card bellow we are going to run using the software blupf90+:
/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/renf90_ssGWAS1_Ginv.par
# BLUPF90 parameter file created by RENUMF90
DATAFILE
renf90.dat
NUMBER_OF_TRAITS
1
NUMBER_OF_EFFECTS
3
OBSERVATION(S)
1
WEIGHT(S)
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
2 4 cross
3 22 cross
4 3724 cross
RANDOM_RESIDUAL VALUES
77182.
RANDOM_GROUP
3
RANDOM_TYPE
add_an_upginb
FILE
renadd03.ped
(CO)VARIANCES
28212.
OPTION SNP_file bruno_gntps_iid_71_clean
OPTION no_quality_control
OPTION origID
OPTION missing 9999
OPTION saveGInverse
OPTION saveA22Inverse
OPTION snp_p_value
The parameter card bellow we are going to run using the software postGSf90:
# BLUPF90 parameter file created by RENUMF90
DATAFILE
renf90.dat
NUMBER_OF_TRAITS
1
NUMBER_OF_EFFECTS
3
OBSERVATION(S)
1
WEIGHT(S)
EFFECTS: POSITIONS_IN_DATAFILE NUMBER_OF_LEVELS TYPE_OF_EFFECT[EFFECT NESTED]
2 4 cross
3 22 cross
4 3724 cross
RANDOM_RESIDUAL VALUES
77182.
RANDOM_GROUP
3
RANDOM_TYPE
add_an_upginb
FILE
renadd03.ped
(CO)VARIANCES
28212.
OPTION SNP_file bruno_gntps_iid_71_clean
OPTION origID
OPTION no_quality_control
OPTION readGInverse
OPTION readA22Inverse
OPTION map_file snpmap.txt_clean
OPTION snp_p_value
OPTION Manhattan_plot_R
OPTION Manhattan_plot
This code will generate several files, among them:
To make the Manhattan Plot considering Genome Independent Segment we should run the code bellow. This code has part of the code in the file Pft1e3.R
Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")
library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
summarise(total_length = sum(`Seq length`)) %>%
pull(total_length)
# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8
# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)
NeL <- Ne*L_M
# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)
# Calculate Bonferroni threshold (already done)
bonf <- -log10(0.05 / Me)
setwd("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO")
# Read in and process data for Manhattan plot
yyy1 <- read.table("chrsnp_pval")
yyy <- yyy1[order(yyy1$V4), ]
zzz <- yyy[which(yyy$V1 == 1 & yyy$V2 == 3), ]
n <- nrow(zzz)
y <- zzz[, 4]
x <- zzz[, 3]
chr1 <- zzz[, 5]
chr <- NULL
pos <- NULL
for (i in unique(yyy$V5)) {
zz <- yyy[yyy$V5 == i, ]
key <- zz$V4
medio <- round(nrow(zz) / 2, 0)
z <- key[medio]
pos <- c(pos, z)
}
chrn <- unique(yyy$V5)
one <- which(chr1 %% 4 == 0)
two <- which(chr1 %% 4 == 1)
three <- which(chr1 %% 4 == 2)
four <- which(chr1 %% 4 == 3)
chr[one] <- "darkgoldenrod"
chr[two] <- "darkorchid"
chr[three] <- "blue"
chr[four] <- "forestgreen"
# Create Manhattan plot with Bonferroni line and legend
pdf(file = "Pft1e3_manplot_with_bonf_ind_seg.pdf", family = "sans", height = 27.8, width = 50, pointsize = 20, bg = "white")
par(mfrow = c(1, 1), family = "sans", cex = 1.5, font = 2)
plot(y, x, xaxt = "n", main = "Manhattan Plot SNP p_value - Trait: 1 Effect: 3", xlab = "", ylab = "-log10(p-value)", pch = 20, xlim = c(1, n), ylim = c(0, max(x)), col = chr)
# Add Bonferroni line
abline(h = bonf, col = "red", lwd = 2, lty = 2)
# Add legend for Bonferroni line
legend("topright", legend = "Bonferroni correction for genome independent segments", col = "red", lwd = 2, lty = 2, cex = 1)
axis(1, at = pos, labels = chrn)
dev.off()
For additional analysis like Variant Effect Prediction (VEP) we need the rsID, to get the rsID we use the software SNPChimp which requires SNP_names, but the BLUPF90 output have only the Chromosome and Position of the SNPs, so we are going to perfome these two steps to get one file with t he significant SNPs + SNP_name + rsID
For this analysis we have to build this new sheet bringing SNP ID from snpmap.txt
gwas = read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/chrsnp_pval")
colnames(gwas) <- c("V1", "V2", "LOG_P", "SNP", "CHR", "BP")
gwas <- filter(gwas, LOG_P >= bonf)
snpmap <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/snpmap.txt", header = T)
# Filter snpmap to only include rows that match the CHR and BP values in out_genes
filtered_snpmap <- snpmap[snpmap$CHR %in% gwas$CHR & snpmap$POS %in% gwas$BP, ]
# Merge the filtered snpmap with out_genes
gwas_snpname <- merge(gwas, filtered_snpmap[, c("CHR", "POS", "SNP_ID")],
by.x = c("CHR", "BP"), by.y = c("CHR", "POS"),
all.x = TRUE)
gwas_snpname <- gwas_snpname[,c("CHR", "BP", "LOG_P", "SNP_ID")]
write.csv(gwas_snpname, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/chrsnp_pval_SNPid_ind_seg_sig_BLUPF90.csv")
| X | CHR | BP | LOG_P | SNP_ID | rsID | X.1 | rs | SNP_name | chromosome | position |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 11 | 19779915 | 4.828190 | Hapmap55558-rs29013980 | NA | NA | rs110160157 | ARS-BFGL-NGS-12139 | 0 | 0 |
| 2 | 12 | 17477322 | 4.417576 | BTB-00488482 | NA | NA | rs42751504 | ARS-BFGL-NGS-37809 | 0 | 0 |
| 3 | 14 | 15929822 | 5.467439 | ARS-BFGL-NGS-82859 | NA | NA | rs43691687 | BTB-00488482 | 0 | 0 |
| 4 | 15 | 33066384 | 5.041271 | BTB-00594449 | NA | NA | rs42305073 | BTB-01148543 | 0 | 0 |
| 5 | 15 | 57930281 | 4.514377 | ARS-BFGL-NGS-30515 | NA | NA | rs29013980 | Hapmap55558-rs29013980 | 11 | 19803705 |
| 6 | 17 | 46487068 | 4.547766 | ARS-BFGL-NGS-12510 | NA | NA | rs43691687 | BTB-00488482 | 12 | 16563156 |
| 7 | 17 | 47884497 | 6.622354 | ARS-BFGL-NGS-87412 | NA | NA | rs110506037 | ARS-BFGL-NGS-82859 | 14 | 15419973 |
| 8 | 17 | 48800954 | 5.268940 | ARS-BFGL-NGS-112149 | NA | NA | rs41764450 | BTB-00594449 | 15 | 31708030 |
| 9 | 2 | 118820218 | 4.420962 | Hapmap53065-rs29026778 | NA | NA | rs41764450 | BTB-00594449 | 15 | 33601229 |
| 10 | 2 | 41602429 | 4.497977 | BTB-00096979 | NA | NA | rs110565206 | ARS-BFGL-NGS-30515 | 15 | 57514944 |
| 11 | 2 | 41670655 | 4.377284 | Hapmap48777-BTA-47434 | NA | NA | rs110565206 | ARS-BFGL-NGS-30515 | 15 | 58709858 |
| 12 | 20 | 60365668 | 4.603817 | BTB-01341053 | NA | NA | rs110038841 | ARS-BFGL-NGS-12510 | 17 | 47484553 |
| 13 | 20 | 65351653 | 4.891206 | ARS-BFGL-NGS-91119 | NA | NA | rs109273103 | ARS-BFGL-NGS-87412 | 17 | 48889044 |
| 14 | 21 | 4055731 | 4.493938 | ARS-BFGL-NGS-112210 | NA | NA | rs110038841 | ARS-BFGL-NGS-12510 | 17 | 48998486 |
| 15 | 23 | 44822126 | 4.316434 | ARS-BFGL-NGS-115605 | NA | NA | rs109276211 | ARS-BFGL-NGS-112149 | 17 | 49828442 |
| 16 | 24 | 857728 | 4.458018 | Hapmap47669-BTA-59022 | NA | NA | rs109273103 | ARS-BFGL-NGS-87412 | 17 | 50583981 |
| 17 | 28 | 20016672 | 5.130557 | ARS-BFGL-NGS-116552 | NA | NA | rs109276211 | ARS-BFGL-NGS-112149 | 17 | 51512244 |
| 18 | 28 | 35807366 | 4.695875 | ARS-BFGL-NGS-71077 | NA | NA | rs43305418 | BTB-00096979 | 2 | 41717655 |
| 19 | 29 | 33039863 | 4.933140 | ARS-BFGL-NGS-41631 | NA | NA | rs41636137 | Hapmap48777-BTA-47434 | 2 | 41786376 |
| 20 | 3 | 110866523 | 4.815943 | ARS-BFGL-NGS-118207 | NA | NA | rs43305418 | BTB-00096979 | 2 | 43072293 |
| 21 | 3 | 111526170 | 4.760351 | ARS-BFGL-NGS-74948 | NA | NA | rs41636137 | Hapmap48777-BTA-47434 | 2 | 43141034 |
| 22 | 3 | 111730561 | 4.390707 | BTB-01641394 | NA | NA | rs29026778 | Hapmap53065-rs29026778 | 2 | 119630462 |
| 23 | 3 | 111751663 | 4.390707 | ARS-BFGL-NGS-37809 | NA | NA | rs29026778 | Hapmap53065-rs29026778 | 2 | 122995670 |
| 24 | 3 | 111772736 | 5.088738 | ARS-BFGL-NGS-25298 | NA | NA | rs42462935 | BTB-01341053 | 20 | 60447656 |
| 25 | 4 | 103979600 | 5.591006 | ARS-BFGL-NGS-110705 | NA | NA | rs42462935 | BTB-01341053 | 20 | 64066021 |
| 26 | 4 | 107787202 | 5.375628 | ARS-BFGL-NGS-45265 | NA | NA | rs109575643 | ARS-BFGL-NGS-91119 | 20 | 65467968 |
| 27 | 4 | 109568557 | 4.709882 | Hapmap48062-BTA-72409 | NA | NA | rs109575643 | ARS-BFGL-NGS-91119 | 20 | 69096095 |
| 28 | 4 | 110053134 | 5.245598 | UA-IFASA-2147 | NA | NA | rs109631116 | ARS-BFGL-NGS-112210 | 21 | 2685189 |
| 29 | 4 | 24239851 | 4.641241 | Hapmap60681-rs29013301 | NA | NA | rs109631116 | ARS-BFGL-NGS-112210 | 21 | 4193189 |
| 30 | 4 | 25933587 | 4.352451 | Hapmap50554-BTA-107048 | NA | NA | rs42029843 | ARS-BFGL-NGS-115605 | 23 | 44680105 |
| 31 | 4 | 26511448 | 4.537662 | Hapmap59011-rs29027498 | NA | NA | rs42029843 | ARS-BFGL-NGS-115605 | 23 | 45514921 |
| 32 | 4 | 27021431 | 5.045447 | Hapmap59743-rs29017061 | NA | NA | rs41645754 | Hapmap47669-BTA-59022 | 24 | 595659 |
| 33 | 4 | 27094376 | 5.485063 | BTB-00170785 | NA | NA | rs41645754 | Hapmap47669-BTA-59022 | 24 | 1094942 |
| 34 | 4 | 50755462 | 5.015905 | ARS-BFGL-NGS-12139 | NA | NA | rs110428837 | ARS-BFGL-NGS-116552 | 28 | 18933788 |
| 35 | 4 | 7873471 | 4.610767 | Hapmap60503-rs29018741 | NA | NA | rs110428837 | ARS-BFGL-NGS-116552 | 28 | 20096355 |
| 36 | 4 | 95650788 | 4.825103 | Hapmap58854-rs29023486 | NA | NA | rs109584097 | ARS-BFGL-NGS-71077 | 28 | 35297722 |
| 37 | 6 | 36184467 | 4.758110 | Hapmap23854-BTC-062412 | NA | NA | rs109584097 | ARS-BFGL-NGS-71077 | 28 | 36010482 |
| 38 | 7 | 110306791 | 4.609691 | BTB-01148543 | NA | NA | rs110121846 | ARS-BFGL-NGS-41631 | 29 | 33593218 |
| NA | NA | NA | NA | NA | NA | rs110121846 | ARS-BFGL-NGS-41631 | 29 | 34563419 | |
| NA | NA | NA | NA | NA | NA | rs110081798 | ARS-BFGL-NGS-118207 | 3 | 111458159 | |
| NA | NA | NA | NA | NA | NA | rs41585925 | ARS-BFGL-NGS-74948 | 3 | 112135228 | |
| NA | NA | NA | NA | NA | NA | rs42752353 | BTB-01641394 | 3 | 112340357 | |
| NA | NA | NA | NA | NA | NA | rs42751504 | ARS-BFGL-NGS-37809 | 3 | 112361478 | |
| NA | NA | NA | NA | NA | NA | rs109868537 | ARS-BFGL-NGS-25298 | 3 | 112382551 | |
| NA | NA | NA | NA | NA | NA | rs110081798 | ARS-BFGL-NGS-118207 | 3 | 118126544 | |
| NA | NA | NA | NA | NA | NA | rs41585925 | ARS-BFGL-NGS-74948 | 3 | 118803985 | |
| NA | NA | NA | NA | NA | NA | rs42752353 | BTB-01641394 | 3 | 119008234 | |
| NA | NA | NA | NA | NA | NA | rs42751504 | ARS-BFGL-NGS-37809 | 3 | 119029352 | |
| NA | NA | NA | NA | NA | NA | rs29018741 | Hapmap60503-rs29018741 | 4 | 7779209 | |
| NA | NA | NA | NA | NA | NA | rs29018741 | Hapmap60503-rs29018741 | 4 | 8585313 | |
| NA | NA | NA | NA | NA | NA | rs29013301 | Hapmap60681-rs29013301 | 4 | 24183919 | |
| NA | NA | NA | NA | NA | NA | rs29013301 | Hapmap60681-rs29013301 | 4 | 25355321 | |
| NA | NA | NA | NA | NA | NA | rs41615935 | Hapmap50554-BTA-107048 | 4 | 25913130 | |
| NA | NA | NA | NA | NA | NA | rs29027498 | Hapmap59011-rs29027498 | 4 | 26490406 | |
| NA | NA | NA | NA | NA | NA | rs29017061 | Hapmap59743-rs29017061 | 4 | 26995149 | |
| NA | NA | NA | NA | NA | NA | rs43377276 | BTB-00170785 | 4 | 27067897 | |
| NA | NA | NA | NA | NA | NA | rs41615935 | Hapmap50554-BTA-107048 | 4 | 27162987 | |
| NA | NA | NA | NA | NA | NA | rs29027498 | Hapmap59011-rs29027498 | 4 | 27732337 | |
| NA | NA | NA | NA | NA | NA | rs29017061 | Hapmap59743-rs29017061 | 4 | 28239398 | |
| NA | NA | NA | NA | NA | NA | rs43377276 | BTB-00170785 | 4 | 28312330 | |
| NA | NA | NA | NA | NA | NA | rs110160157 | ARS-BFGL-NGS-12139 | 4 | 51047434 | |
| NA | NA | NA | NA | NA | NA | rs110160157 | ARS-BFGL-NGS-12139 | 4 | 52798744 | |
| NA | NA | NA | NA | NA | NA | rs29023486 | Hapmap58854-rs29023486 | 4 | 96436124 | |
| NA | NA | NA | NA | NA | NA | rs29023486 | Hapmap58854-rs29023486 | 4 | 98848662 | |
| NA | NA | NA | NA | NA | NA | rs110079750 | ARS-BFGL-NGS-110705 | 4 | 104816971 | |
| NA | NA | NA | NA | NA | NA | rs110079750 | ARS-BFGL-NGS-110705 | 4 | 107675833 | |
| NA | NA | NA | NA | NA | NA | rs110935391 | ARS-BFGL-NGS-45265 | 4 | 108608248 | |
| NA | NA | NA | NA | NA | NA | rs41566051 | Hapmap48062-BTA-72409 | 4 | 110354858 | |
| NA | NA | NA | NA | NA | NA | rs29012492 | UA-IFASA-2147 | 4 | 110839255 | |
| NA | NA | NA | NA | NA | NA | rs110935391 | ARS-BFGL-NGS-45265 | 4 | 111954676 | |
| NA | NA | NA | NA | NA | NA | rs41566051 | Hapmap48062-BTA-72409 | 4 | 113822987 | |
| NA | NA | NA | NA | NA | NA | rs29012492 | UA-IFASA-2147 | 4 | 114307093 | |
| NA | NA | NA | NA | NA | NA | rs81154019 | Hapmap23854-BTC-062412 | 6 | 37610880 |
After get the rsID from SNPCHIMP I produced this table with the code bellow:
rsid <- read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/SNPchimp_result_1723415732_BLUPF90GWAS.tsv", header = T)
merged <- merge(rsid, gwas_snpname, by.x ="SNP_name", by.y ="SNP_ID")
colnames(merged)
merged <- merged[,c("SNP_name", "rs", "CHR", "BP", "LOG_P")]
colnames(merged) <- c("SNP_name", "rsID", "CHR", "BP", "LOG_P")
write.csv(merged, "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/gwas_ind_seg_sig_SNPname_rsID.csv")
| X | SNP_name | rsID | CHR | BP | LOG_P |
|---|---|---|---|---|---|
| 1 | ARS-BFGL-NGS-110705 | rs110079750 | 4 | 103979600 | 5.591006 |
| 2 | ARS-BFGL-NGS-112149 | rs109276211 | 17 | 48800954 | 5.268940 |
| 3 | ARS-BFGL-NGS-112210 | rs109631116 | 21 | 4055731 | 4.493938 |
| 4 | ARS-BFGL-NGS-115605 | rs42029843 | 23 | 44822126 | 4.316434 |
| 5 | ARS-BFGL-NGS-116552 | rs110428837 | 28 | 20016672 | 5.130557 |
| 6 | ARS-BFGL-NGS-118207 | rs110081798 | 3 | 110866523 | 4.815943 |
| 7 | ARS-BFGL-NGS-12139 | rs110160157 | 4 | 50755462 | 5.015905 |
| 8 | ARS-BFGL-NGS-12510 | rs110038841 | 17 | 46487068 | 4.547766 |
| 9 | ARS-BFGL-NGS-25298 | rs109868537 | 3 | 111772736 | 5.088738 |
| 10 | ARS-BFGL-NGS-30515 | rs110565206 | 15 | 57930281 | 4.514377 |
| 11 | ARS-BFGL-NGS-37809 | rs42751504 | 3 | 111751663 | 4.390707 |
| 12 | ARS-BFGL-NGS-41631 | rs110121846 | 29 | 33039863 | 4.933140 |
| 13 | ARS-BFGL-NGS-45265 | rs110935391 | 4 | 107787202 | 5.375628 |
| 14 | ARS-BFGL-NGS-71077 | rs109584097 | 28 | 35807366 | 4.695875 |
| 15 | ARS-BFGL-NGS-74948 | rs41585925 | 3 | 111526170 | 4.760351 |
| 16 | ARS-BFGL-NGS-82859 | rs110506037 | 14 | 15929822 | 5.467439 |
| 17 | ARS-BFGL-NGS-87412 | rs109273103 | 17 | 47884497 | 6.622354 |
| 18 | ARS-BFGL-NGS-91119 | rs109575643 | 20 | 65351653 | 4.891206 |
| 19 | BTB-00096979 | rs43305418 | 2 | 41602429 | 4.497977 |
| 20 | BTB-00170785 | rs43377276 | 4 | 27094376 | 5.485063 |
| 21 | BTB-00488482 | rs43691687 | 12 | 17477322 | 4.417576 |
| 22 | BTB-00594449 | rs41764450 | 15 | 33066384 | 5.041271 |
| 23 | BTB-01148543 | rs42305073 | 7 | 110306791 | 4.609691 |
| 24 | BTB-01341053 | rs42462935 | 20 | 60365668 | 4.603817 |
| 25 | BTB-01641394 | rs42752353 | 3 | 111730561 | 4.390707 |
| 26 | Hapmap23854-BTC-062412 | rs81154019 | 6 | 36184467 | 4.758110 |
| 27 | Hapmap47669-BTA-59022 | rs41645754 | 24 | 857728 | 4.458018 |
| 28 | Hapmap48062-BTA-72409 | rs41566051 | 4 | 109568557 | 4.709882 |
| 29 | Hapmap48777-BTA-47434 | rs41636137 | 2 | 41670655 | 4.377284 |
| 30 | Hapmap50554-BTA-107048 | rs41615935 | 4 | 25933587 | 4.352451 |
| 31 | Hapmap53065-rs29026778 | rs29026778 | 2 | 118820218 | 4.420962 |
| 32 | Hapmap55558-rs29013980 | rs29013980 | 11 | 19779915 | 4.828190 |
| 33 | Hapmap58854-rs29023486 | rs29023486 | 4 | 95650788 | 4.825103 |
| 34 | Hapmap59011-rs29027498 | rs29027498 | 4 | 26511448 | 4.537662 |
| 35 | Hapmap59743-rs29017061 | rs29017061 | 4 | 27021431 | 5.045447 |
| 36 | Hapmap60503-rs29018741 | rs29018741 | 4 | 7873471 | 4.610767 |
| 37 | Hapmap60681-rs29013301 | rs29013301 | 4 | 24239851 | 4.641241 |
| 38 | UA-IFASA-2147 | rs29012492 | 4 | 110053134 | 5.245598 |
To build a Manhattan Plot using the plain Bonferroni we need to recalculate the object bonf.
# Calculate Bonferroni threshold (already done)
nsnps <- nrow(yyy)
bonf <- -log10(0.05 / nsnps)
# Create Manhattan plot with Bonferroni line and legend
pdf(file = "Pft1e3_manplot_with_plain_bonf.pdf", family = "sans", height = 27.8, width = 50, pointsize = 20, bg = "white")
par(mfrow = c(1, 1), family = "sans", cex = 1.5, font = 2)
plot(y, x, xaxt = "n", main = "Manhattan Plot SNP p_value - Trait: 1 Effect: 3", xlab = "", ylab = "-log10(p-value)", pch = 20, xlim = c(1, n), ylim = c(0, max(x)), col = chr)
# Add Bonferroni line
abline(h = bonf, col = "red", lwd = 2, lty = 2)
# Add legend for Bonferroni line
legend("topright", legend = "Bonferroni correction for genome independent segments", col = "red", lwd = 2, lty = 2, cex = 1)
axis(1, at = pos, labels = chrn)
dev.off()
Finally, to correct for multiple testing using FDR
yyy1$bh <- p.adjust(yyy1$p_value, method = "BH")
yyy1$p_value <- 10^(-yyy1$V3)
yyy1$bh <- p.adjust(yyy1$p_value, method = "BH")
SNP_sig_BH <- filter(yyy1, bh < 0.05)
write.table(SNP_sig_BH, file = "/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/SNP_sig_BH.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
colnames(yyy1) <- c("V1", "V2", "logP", "V4", "Chr", "bp", "p", "bh")
gwas <- yyy1
# Prepare the dataset
don <- gwas %>%
group_by(Chr) %>%
summarise(chr_len = max(bp)) %>%
mutate(tot = cumsum(as.numeric(chr_len)) - as.numeric(chr_len)) %>%
select(-chr_len) %>%
left_join(gwas, ., by = c("Chr" = "Chr")) %>%
arrange(Chr, bp) %>%
mutate(BPcum = as.numeric(bp) + tot) %>%
mutate(is_highlight = ifelse(bh < 0.05, "yes", "no")) %>%
mutate(is_annotate = ifelse(bh < 0.05, "yes", "no"))
# Remove rows with missing BPcum values
don <- don %>% filter(!is.na(BPcum))
# Prepare X axis
axisdf <- don %>%
group_by(Chr) %>%
summarize(center = (max(BPcum) + min(BPcum)) / 2)
# Find the maximum logP value for setting y-axis limits
max_logP <- max(don$logP, na.rm = TRUE)
# Make the plot
ggplot(don, aes(x = BPcum, y = logP)) +
geom_point(aes(color = as.factor(Chr)), alpha = 0.8, size = 1.3) +
scale_color_manual(values = rep(c("grey", "skyblue"), 22)) +
scale_x_continuous(label = axisdf$Chr, breaks = axisdf$center) +
scale_y_continuous(expand = c(0, 0), limits = c(0, max_logP + 1)) + # Set y-axis limit
geom_point(data = subset(don, is_highlight == "yes"), color = "orange", size = 2) +
theme_bw() +
theme(
legend.position = "none",
panel.border = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
) +
labs(title = "GWAS Cortisol", x = "Chromosomes", y = "-log10(p)")
Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")
library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
summarise(total_length = sum(`Seq length`)) %>%
pull(total_length)
# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8
# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)
NeL <- Ne*L_M
# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)
# Calculate Bonferroni threshold (already done)
bonf <- -log10(0.05 / Me)
# GALLO
#import a QTL annotation file
qtl_UCD1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Animal_QTLdb_release53_cattleARS_UCD1.gff.gz",file_type="gff")
#import a gene annotation file
gene_UDC1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Bos_taurus.ARS-UCD1.2.110.gtf.gz",file_type="gtf")
#import MARKER files = the GWAS output
gwas = read.table("/home/bambrozi/2_CORTISOL/GWAS/BLUPF90/EXTREME_PHENO/chrsnp_pval")
colnames(gwas) <- c("V1", "V2", "LOG_P", "SNP", "CHR", "BP")
gwas <- filter(gwas, LOG_P >= bonf)
#FINDING GENES AND QTLs ARROUND THE MARKER
#FINDING GENES
out.genes <- find_genes_qtls_around_markers(db_file= gene_UDC1_2,
marker_file= gwas,
method = "gene",
marker = "snp",
interval = 50000,
nThreads = NULL)
write.table(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/out_genes_50k.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
write.csv(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/out_genes_50k.csv")
#FINDING QTLs
out.qtl <- find_genes_qtls_around_markers(db_file= qtl_UCD1_2,
marker_file= gwas,
method = "qtl",
marker = "snp",
interval = 50000,
nThreads = NULL)
write.table(out.qtl, file = "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/out_qtl_50k.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
library(tidyverse)
out.qtl.clean <- select(out.qtl, c("CHR", "SNP", "BP", "QTL_type", "start_pos", "end_pos","QTL_ID"))
write.csv(out.qtl.clean, file = "/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/out_qtl_50k_clean.csv")
The GALLO output are bellow:
For GENES
| X | V1 | V2 | LOG_P | SNP | CHR | BP | chr | start_pos | end_pos | width | strand | gene_id | gene_name | gene_biotype |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 1 | 3 | 4.420962 | 4632 | 2 | 118820218 | 2 | 118803760 | 118824701 | 20942 | + | ENSBTAG00000030718 | SPATA3 | protein_coding |
| 2 | 1 | 3 | 4.420962 | 4632 | 2 | 118820218 | 2 | 118831062 | 118836777 | 5716 | + | ENSBTAG00000054626 | NA | lncRNA |
| 3 | 1 | 3 | 4.420962 | 4632 | 2 | 118820218 | 2 | 118846915 | 118854132 | 7218 | + | ENSBTAG00000026710 | C2H2orf72 | protein_coding |
| 4 | 1 | 3 | 4.420962 | 4632 | 2 | 118820218 | 2 | 118865174 | 118946608 | 81435 | + | ENSBTAG00000005119 | PSMD1 | protein_coding |
| 5 | 1 | 3 | 4.377284 | 3498 | 2 | 41670655 | 2 | 41676135 | 42259624 | 583490 | - | ENSBTAG00000005562 | GALNT13 | protein_coding |
| 6 | 1 | 3 | 4.815943 | 6862 | 3 | 110866523 | 3 | 110769753 | 110827938 | 58186 | + | ENSBTAG00000013867 | DLGAP3 | protein_coding |
| 7 | 1 | 3 | 4.815943 | 6862 | 3 | 110866523 | 3 | 110835276 | 110839536 | 4261 | + | ENSBTAG00000014235 | SMIM12 | protein_coding |
| 8 | 1 | 3 | 4.815943 | 6862 | 3 | 110866523 | 3 | 110888198 | 110890904 | 2707 | - | ENSBTAG00000013881 | GJA4 | protein_coding |
| 9 | 1 | 3 | 4.815943 | 6862 | 3 | 110866523 | 3 | 110900183 | 110905849 | 5667 | - | ENSBTAG00000012584 | GJB3 | protein_coding |
| 10 | 1 | 3 | 4.760351 | 6873 | 3 | 111526170 | 3 | 111552563 | 111568286 | 15724 | - | ENSBTAG00000020798 | C3H1orf94 | protein_coding |
| 11 | 1 | 3 | 4.390707 | 6880 | 3 | 111730561 | 3 | 111603940 | 112289188 | 685249 | + | ENSBTAG00000005784 | CSMD2 | protein_coding |
| 12 | 1 | 3 | 4.390707 | 6881 | 3 | 111751663 | 3 | 111603940 | 112289188 | 685249 | + | ENSBTAG00000005784 | CSMD2 | protein_coding |
| 13 | 1 | 3 | 5.088738 | 6882 | 3 | 111772736 | 3 | 111603940 | 112289188 | 685249 | + | ENSBTAG00000005784 | CSMD2 | protein_coding |
| 14 | 1 | 3 | 4.537662 | 7515 | 4 | 26511448 | 4 | 26324012 | 26469689 | 145678 | - | ENSBTAG00000014074 | SNX13 | protein_coding |
| 15 | 1 | 3 | 5.485063 | 7526 | 4 | 27094376 | 4 | 27093041 | 27667961 | 574921 | + | ENSBTAG00000003808 | HDAC9 | protein_coding |
| 16 | 1 | 3 | 4.825103 | 8704 | 4 | 95650788 | 4 | 95631850 | 95631921 | 72 | - | ENSBTAG00000045386 | bta-mir-320b | miRNA |
| 17 | 1 | 3 | 5.591006 | 8831 | 4 | 103979600 | 4 | 103877533 | 103947944 | 70412 | - | ENSBTAG00000004799 | DENND2A | protein_coding |
| 18 | 1 | 3 | 5.591006 | 8831 | 4 | 103979600 | 4 | 103994401 | 104008963 | 14563 | + | ENSBTAG00000020484 | ADCK2 | protein_coding |
| 19 | 1 | 3 | 5.591006 | 8831 | 4 | 103979600 | 4 | 104013524 | 104019157 | 5634 | + | ENSBTAG00000021759 | NDUFB2 | protein_coding |
| 20 | 1 | 3 | 5.591006 | 8831 | 4 | 103979600 | 4 | 104027627 | 104194547 | 166921 | - | ENSBTAG00000021761 | BRAF | protein_coding |
| 21 | 1 | 3 | 5.375628 | 8888 | 4 | 107787202 | 4 | 107701708 | 108020989 | 319282 | + | ENSBTAG00000054424 | NA | protein_coding |
| 22 | 1 | 3 | 5.375628 | 8888 | 4 | 107787202 | 4 | 107718449 | 108101163 | 382715 | - | ENSBTAG00000017676 | TPK1 | protein_coding |
| 23 | 1 | 3 | 4.709882 | 8904 | 4 | 109568557 | 4 | 109526182 | 109526350 | 169 | - | ENSBTAG00000053666 | NA | misc_RNA |
| 24 | 1 | 3 | 4.610767 | 7184 | 4 | 7873471 | 4 | 7867490 | 7867588 | 99 | - | ENSBTAG00000045389 | U6 | snRNA |
| 25 | 1 | 3 | 5.015905 | 7910 | 4 | 50755462 | 4 | 50743789 | 50957593 | 213805 | - | ENSBTAG00000006589 | CFTR | protein_coding |
| 26 | 1 | 3 | 4.758110 | 11511 | 6 | 36184467 | 6 | 36052150 | 36198478 | 146329 | - | ENSBTAG00000010120 | HERC3 | protein_coding |
| 27 | 1 | 3 | 4.609691 | 14772 | 7 | 110306791 | 7 | 110296879 | 110565532 | 268654 | + | ENSBTAG00000035662 | CAMK4 | protein_coding |
| 28 | 1 | 3 | 4.828190 | 20558 | 11 | 19779915 | 11 | 19680235 | 19765027 | 84793 | - | ENSBTAG00000001114 | PRKD3 | protein_coding |
| 29 | 1 | 3 | 4.828190 | 20558 | 11 | 19779915 | 11 | 19792170 | 19822758 | 30589 | + | ENSBTAG00000013923 | QPCT | protein_coding |
| 30 | 1 | 3 | 5.467439 | 25167 | 14 | 15929822 | 14 | 15865228 | 15995482 | 130255 | - | ENSBTAG00000013537 | FER1L6 | protein_coding |
| 31 | 1 | 3 | 5.467439 | 25167 | 14 | 15929822 | 14 | 15914441 | 15917042 | 2602 | + | ENSBTAG00000048373 | NA | protein_coding |
| 32 | 1 | 3 | 5.041271 | 26782 | 15 | 33066384 | 15 | 32820702 | 33050885 | 230184 | - | ENSBTAG00000054820 | NA | lncRNA |
| 33 | 1 | 3 | 4.547766 | 29742 | 17 | 46487068 | 17 | 46406767 | 46715519 | 308753 | + | ENSBTAG00000009797 | RIMBP2 | protein_coding |
| 34 | 1 | 3 | 6.622354 | 29772 | 17 | 47884497 | 17 | 47426664 | 48084896 | 658233 | + | ENSBTAG00000002950 | TMEM132D | protein_coding |
| 35 | 1 | 3 | 5.268940 | 29784 | 17 | 48800954 | 17 | 48417339 | 48868560 | 451222 | - | ENSBTAG00000046256 | TMEM132C | protein_coding |
| 36 | 1 | 3 | 4.891206 | 33665 | 20 | 65351653 | 20 | 65311530 | 65343117 | 31588 | - | ENSBTAG00000009401 | MTRR | protein_coding |
| 37 | 1 | 3 | 4.891206 | 33665 | 20 | 65351653 | 20 | 65343172 | 65354775 | 11604 | + | ENSBTAG00000009400 | FASTKD3 | protein_coding |
| 38 | 1 | 3 | 4.891206 | 33665 | 20 | 65351653 | 20 | 65368557 | 65384042 | 15486 | + | ENSBTAG00000055034 | CFAP90 | protein_coding |
| 39 | 1 | 3 | 4.891206 | 33665 | 20 | 65351653 | 20 | 65388705 | 65842986 | 454282 | - | ENSBTAG00000019210 | ADCY2 | protein_coding |
| 40 | 1 | 3 | 4.493938 | 33836 | 21 | 4055731 | 21 | 3866347 | 4146441 | 280095 | - | ENSBTAG00000013422 | GABRB3 | protein_coding |
| 41 | 1 | 3 | 4.316434 | 36688 | 23 | 44822126 | 23 | 44802116 | 44835671 | 33556 | - | ENSBTAG00000010256 | TMEM170B | protein_coding |
| 42 | 1 | 3 | 4.458018 | 36876 | 24 | 857728 | 24 | 779627 | 866891 | 87265 | - | ENSBTAG00000000656 | NFATC1 | protein_coding |
| 43 | 1 | 3 | 4.458018 | 36876 | 24 | 857728 | 24 | 879074 | 1029386 | 150313 | - | ENSBTAG00000001224 | ATP9B | protein_coding |
| 44 | 1 | 3 | 4.695875 | 41057 | 28 | 35807366 | 28 | 35805434 | 35823873 | 18440 | + | ENSBTAG00000021416 | PRXL2A | protein_coding |
| 45 | 1 | 3 | 4.695875 | 41057 | 28 | 35807366 | 28 | 35848163 | 35904370 | 56208 | + | ENSBTAG00000003907 | TSPAN14 | protein_coding |
| 46 | 1 | 3 | 4.933140 | 41785 | 29 | 33039863 | 29 | 32993975 | 33024632 | 30658 | - | ENSBTAG00000003176 | JAM3 | protein_coding |
| 47 | 1 | 3 | 4.933140 | 41785 | 29 | 33039863 | 29 | 33036823 | 33044208 | 7386 | - | ENSBTAG00000052082 | NA | lncRNA |
FOR QTLs
| X | CHR | SNP | BP | QTL_type | start_pos | end_pos | QTL_ID |
|---|---|---|---|---|---|---|---|
| 1 | 2 | 3497 | 41602429 | Health | 41584083 | 41584087 | 32371 |
| 2 | 2 | 3497 | 41602429 | Milk | 41646649 | 41646653 | 113699 |
| 3 | 2 | 3498 | 41670655 | Milk | 41646649 | 41646653 | 113699 |
| 4 | 2 | 3498 | 41670655 | Milk | 41668170 | 41668174 | 113700 |
| 5 | 2 | 3498 | 41670655 | Meat_and_Carcass | 41676143 | 41676147 | 154062 |
| 6 | 2 | 3498 | 41670655 | Reproduction | 41693862 | 41693866 | 29899 |
| 7 | 2 | 4632 | 118820218 | Milk | 118779183 | 118779187 | 200902 |
| 8 | 2 | 4632 | 118820218 | Milk | 118779985 | 118779989 | 202500 |
| 9 | 2 | 4632 | 118820218 | Milk | 118781186 | 118781190 | 202905 |
| 10 | 2 | 4632 | 118820218 | Milk | 118785252 | 118785256 | 201638 |
| 11 | 2 | 4632 | 118820218 | Milk | 118789209 | 118789213 | 202885 |
| 12 | 2 | 4632 | 118820218 | Reproduction | 118820216 | 118820220 | 39599 |
| 13 | 2 | 4632 | 118820218 | Reproduction | 118820216 | 118820220 | 39600 |
| 14 | 2 | 4632 | 118820218 | Reproduction | 118820216 | 118820220 | 39601 |
| 15 | 2 | 4632 | 118820218 | Exterior | 118820216 | 118820220 | 39602 |
| 16 | 2 | 4632 | 118820218 | Exterior | 118820216 | 118820220 | 39603 |
| 17 | 2 | 4632 | 118820218 | Milk | 118820216 | 118820220 | 39604 |
| 18 | 2 | 4632 | 118820218 | Production | 118820216 | 118820220 | 39605 |
| 19 | 2 | 4632 | 118820218 | Exterior | 118820216 | 118820220 | 39606 |
| 20 | 2 | 4632 | 118820218 | Milk | 118820216 | 118820220 | 39607 |
| 21 | 2 | 4632 | 118820218 | Production | 118820216 | 118820220 | 39608 |
| 22 | 2 | 4632 | 118820218 | Production | 118820216 | 118820220 | 39609 |
| 23 | 2 | 4632 | 118820218 | Exterior | 118820216 | 118820220 | 39610 |
| 24 | 2 | 4632 | 118820218 | Exterior | 118820216 | 118820220 | 39611 |
| 25 | 2 | 4632 | 118820218 | Production | 118820216 | 118820220 | 39612 |
| 26 | 2 | 4632 | 118820218 | Reproduction | 118820216 | 118820220 | 39613 |
| 27 | 2 | 4632 | 118820218 | Health | 118820216 | 118820220 | 39614 |
| 28 | 2 | 4632 | 118820218 | Exterior | 118820216 | 118820220 | 39615 |
| 29 | 2 | 4632 | 118820218 | Exterior | 118820216 | 118820220 | 39616 |
| 30 | 2 | 4632 | 118820218 | Reproduction | 118820216 | 118820220 | 125235 |
| 31 | 3 | 6862 | 110866523 | Meat_and_Carcass | 110866521 | 110866525 | 152332 |
| 32 | 3 | 6873 | 111526170 | Health | 111487002 | 111487006 | 137202 |
| 33 | 3 | 6873 | 111526170 | Health | 111487002 | 111487006 | 170274 |
| 34 | 3 | 6873 | 111526170 | Health | 111509252 | 111509256 | 170275 |
| 35 | 3 | 6873 | 111526170 | Health | 111515878 | 111515882 | 170276 |
| 36 | 3 | 6880 | 111730561 | Reproduction | 111708234 | 111708238 | 30008 |
| 37 | 3 | 6881 | 111751663 | Reproduction | 111708234 | 111708238 | 30008 |
| 38 | 3 | 6880 | 111730561 | Reproduction | 111708234 | 111708238 | 30244 |
| 39 | 3 | 6881 | 111751663 | Reproduction | 111708234 | 111708238 | 30244 |
| 40 | 3 | 6880 | 111730561 | Meat_and_Carcass | 111708234 | 111708238 | 152258 |
| 41 | 3 | 6881 | 111751663 | Meat_and_Carcass | 111708234 | 111708238 | 152258 |
| 42 | 3 | 6880 | 111730561 | Meat_and_Carcass | 111717521 | 111717525 | 225527 |
| 43 | 3 | 6881 | 111751663 | Meat_and_Carcass | 111717521 | 111717525 | 225527 |
| 44 | 4 | 7184 | 7873471 | Milk | 7839854 | 7839858 | 118382 |
| 45 | 4 | 7184 | 7873471 | Milk | 7851689 | 7851693 | 118383 |
| 46 | 4 | 7184 | 7873471 | Milk | 7878682 | 7878686 | 113676 |
| 47 | 4 | 7184 | 7873471 | Milk | 7878682 | 7878686 | 118072 |
| 48 | 4 | 7471 | 24239851 | Meat_and_Carcass | 24248260 | 24248264 | 228128 |
| 49 | 4 | 7471 | 24239851 | Reproduction | 24279008 | 24279012 | 212516 |
| 50 | 4 | 7471 | 24239851 | Meat_and_Carcass | 24288010 | 24288014 | 107768 |
| 51 | 4 | 7471 | 24239851 | Meat_and_Carcass | 24288010 | 24288014 | 107796 |
| 52 | 4 | 7515 | 26511448 | Exterior | 26511446 | 26511450 | 66124 |
| 53 | 4 | 7524 | 27021431 | Production | 27014514 | 27014518 | 65173 |
| 54 | 4 | 7910 | 50755462 | Meat_and_Carcass | 50781007 | 50781011 | 228112 |
| 55 | 4 | 8831 | 103979600 | Health | 103949847 | 103949851 | 96218 |
| 56 | 4 | 8831 | 103979600 | Meat_and_Carcass | 104013536 | 104013540 | 231600 |
| 57 | 4 | 8888 | 107787202 | Reproduction | 107749162 | 107749166 | 106782 |
| 58 | 6 | 11511 | 36184467 | Milk | 36141496 | 36141500 | 244115 |
| 59 | 6 | 11511 | 36184467 | Milk | 36141496 | 36141500 | 246435 |
| 60 | 6 | 11511 | 36184467 | Milk | 36141496 | 36141500 | 250700 |
| 61 | 6 | 11511 | 36184467 | Production | 36143407 | 36143411 | 190189 |
| 62 | 6 | 11511 | 36184467 | Production | 36143929 | 36143933 | 187391 |
| 63 | 6 | 11511 | 36184467 | Production | 36145023 | 36145027 | 183480 |
| 64 | 6 | 11511 | 36184467 | Production | 36145023 | 36145027 | 190272 |
| 65 | 6 | 11511 | 36184467 | Production | 36145254 | 36145258 | 182513 |
| 66 | 6 | 11511 | 36184467 | Production | 36145254 | 36145258 | 186692 |
| 67 | 6 | 11511 | 36184467 | Production | 36147416 | 36147420 | 187702 |
| 68 | 6 | 11511 | 36184467 | Production | 36147705 | 36147709 | 186912 |
| 69 | 6 | 11511 | 36184467 | Production | 36148373 | 36148377 | 182603 |
| 70 | 6 | 11511 | 36184467 | Production | 36148373 | 36148377 | 188134 |
| 71 | 6 | 11511 | 36184467 | Production | 36149039 | 36149043 | 186551 |
| 72 | 6 | 11511 | 36184467 | Production | 36150874 | 36150878 | 183765 |
| 73 | 6 | 11511 | 36184467 | Production | 36150874 | 36150878 | 186794 |
| 74 | 6 | 11511 | 36184467 | Production | 36151323 | 36151327 | 183528 |
| 75 | 6 | 11511 | 36184467 | Production | 36151323 | 36151327 | 187598 |
| 76 | 6 | 11511 | 36184467 | Production | 36153011 | 36153015 | 188352 |
| 77 | 6 | 11511 | 36184467 | Production | 36154453 | 36154457 | 188950 |
| 78 | 6 | 11511 | 36184467 | Production | 36156604 | 36156608 | 187155 |
| 79 | 6 | 11511 | 36184467 | Production | 36156892 | 36156896 | 183350 |
| 80 | 6 | 11511 | 36184467 | Production | 36156892 | 36156896 | 187196 |
| 81 | 6 | 11511 | 36184467 | Production | 36157620 | 36157624 | 23700 |
| 82 | 6 | 11511 | 36184467 | Production | 36157620 | 36157624 | 66352 |
| 83 | 6 | 11511 | 36184467 | Production | 36157620 | 36157624 | 66353 |
| 84 | 6 | 11511 | 36184467 | Production | 36157620 | 36157624 | 66354 |
| 85 | 6 | 11511 | 36184467 | Production | 36157620 | 36157624 | 66355 |
| 86 | 6 | 11511 | 36184467 | Production | 36157620 | 36157624 | 67222 |
| 87 | 6 | 11511 | 36184467 | Production | 36157620 | 36157624 | 67223 |
| 88 | 6 | 11511 | 36184467 | Production | 36157620 | 36157624 | 164068 |
| 89 | 6 | 11511 | 36184467 | Production | 36157620 | 36157624 | 164167 |
| 90 | 6 | 11511 | 36184467 | Meat_and_Carcass | 36157620 | 36157624 | 164343 |
| 91 | 6 | 11511 | 36184467 | Production | 36157620 | 36157624 | 164369 |
| 92 | 6 | 11511 | 36184467 | Production | 36157620 | 36157624 | 164488 |
| 93 | 6 | 11511 | 36184467 | Production | 36157620 | 36157624 | 183351 |
| 94 | 6 | 11511 | 36184467 | Production | 36157620 | 36157624 | 186777 |
| 95 | 6 | 11511 | 36184467 | Production | 36157986 | 36157990 | 187308 |
| 96 | 6 | 11511 | 36184467 | Production | 36158359 | 36158363 | 187062 |
| 97 | 6 | 11511 | 36184467 | Production | 36158498 | 36158502 | 186952 |
| 98 | 6 | 11511 | 36184467 | Production | 36159606 | 36159610 | 183352 |
| 99 | 6 | 11511 | 36184467 | Production | 36159606 | 36159610 | 187046 |
| 100 | 6 | 11511 | 36184467 | Production | 36160694 | 36160698 | 187096 |
| 101 | 6 | 11511 | 36184467 | Production | 36161273 | 36161277 | 187575 |
| 102 | 6 | 11511 | 36184467 | Production | 36161375 | 36161379 | 186498 |
| 103 | 6 | 11511 | 36184467 | Production | 36161625 | 36161629 | 187580 |
| 104 | 6 | 11511 | 36184467 | Production | 36161633 | 36161637 | 187885 |
| 105 | 6 | 11511 | 36184467 | Production | 36162950 | 36162954 | 187283 |
| 106 | 6 | 11511 | 36184467 | Production | 36163340 | 36163344 | 187023 |
| 107 | 6 | 11511 | 36184467 | Production | 36163428 | 36163432 | 186547 |
| 108 | 6 | 11511 | 36184467 | Reproduction | 36163939 | 36163943 | 14703 |
| 109 | 6 | 11511 | 36184467 | Reproduction | 36163939 | 36163943 | 14707 |
| 110 | 6 | 11511 | 36184467 | Production | 36163939 | 36163943 | 189833 |
| 111 | 6 | 11511 | 36184467 | Production | 36164020 | 36164024 | 189834 |
| 112 | 6 | 11511 | 36184467 | Production | 36164564 | 36164568 | 187124 |
| 113 | 6 | 11511 | 36184467 | Production | 36165001 | 36165005 | 186965 |
| 114 | 6 | 11511 | 36184467 | Production | 36165137 | 36165141 | 187629 |
| 115 | 6 | 11511 | 36184467 | Production | 36165150 | 36165154 | 188219 |
| 116 | 6 | 11511 | 36184467 | Production | 36165218 | 36165222 | 186478 |
| 117 | 6 | 11511 | 36184467 | Production | 36165305 | 36165309 | 187300 |
| 118 | 6 | 11511 | 36184467 | Production | 36165645 | 36165649 | 186485 |
| 119 | 6 | 11511 | 36184467 | Production | 36166108 | 36166112 | 187795 |
| 120 | 6 | 11511 | 36184467 | Production | 36166178 | 36166182 | 187257 |
| 121 | 6 | 11511 | 36184467 | Production | 36166205 | 36166209 | 187129 |
| 122 | 6 | 11511 | 36184467 | Production | 36166298 | 36166302 | 187368 |
| 123 | 6 | 11511 | 36184467 | Reproduction | 36166503 | 36166507 | 14704 |
| 124 | 6 | 11511 | 36184467 | Milk | 36166503 | 36166507 | 105821 |
| 125 | 6 | 11511 | 36184467 | Production | 36166503 | 36166507 | 189279 |
| 126 | 6 | 11511 | 36184467 | Production | 36166791 | 36166795 | 189278 |
| 127 | 6 | 11511 | 36184467 | Production | 36166795 | 36166799 | 182735 |
| 128 | 6 | 11511 | 36184467 | Production | 36166795 | 36166799 | 189277 |
| 129 | 6 | 11511 | 36184467 | Production | 36167325 | 36167329 | 183190 |
| 130 | 6 | 11511 | 36184467 | Production | 36167325 | 36167329 | 186569 |
| 131 | 6 | 11511 | 36184467 | Production | 36167828 | 36167832 | 183191 |
| 132 | 6 | 11511 | 36184467 | Production | 36167828 | 36167832 | 188015 |
| 133 | 6 | 11511 | 36184467 | Production | 36168124 | 36168128 | 183305 |
| 134 | 6 | 11511 | 36184467 | Production | 36168124 | 36168128 | 186440 |
| 135 | 6 | 11511 | 36184467 | Production | 36168592 | 36168596 | 182749 |
| 136 | 6 | 11511 | 36184467 | Production | 36168592 | 36168596 | 186494 |
| 137 | 6 | 11511 | 36184467 | Production | 36168720 | 36168724 | 182760 |
| 138 | 6 | 11511 | 36184467 | Production | 36168720 | 36168724 | 186560 |
| 139 | 6 | 11511 | 36184467 | Production | 36169712 | 36169716 | 187059 |
| 140 | 6 | 11511 | 36184467 | Production | 36169792 | 36169796 | 186511 |
| 141 | 6 | 11511 | 36184467 | Milk | 36170113 | 36170117 | 140218 |
| 142 | 6 | 11511 | 36184467 | Production | 36170133 | 36170137 | 186638 |
| 143 | 6 | 11511 | 36184467 | Production | 36170199 | 36170203 | 187467 |
| 144 | 6 | 11511 | 36184467 | Production | 36171684 | 36171688 | 188463 |
| 145 | 6 | 11511 | 36184467 | Production | 36172370 | 36172374 | 187237 |
| 146 | 6 | 11511 | 36184467 | Milk | 36172376 | 36172380 | 246431 |
| 147 | 6 | 11511 | 36184467 | Production | 36177199 | 36177203 | 183152 |
| 148 | 6 | 11511 | 36184467 | Production | 36177199 | 36177203 | 188011 |
| 149 | 6 | 11511 | 36184467 | Production | 36180643 | 36180647 | 187727 |
| 150 | 6 | 11511 | 36184467 | Production | 36181019 | 36181023 | 187465 |
| 151 | 6 | 11511 | 36184467 | Production | 36182792 | 36182796 | 187002 |
| 152 | 6 | 11511 | 36184467 | Production | 36183911 | 36183915 | 186509 |
| 153 | 6 | 11511 | 36184467 | Milk | 36185332 | 36185336 | 250783 |
| 154 | 6 | 11511 | 36184467 | Milk | 36186103 | 36186107 | 103888 |
| 155 | 6 | 11511 | 36184467 | Milk | 36186103 | 36186107 | 104752 |
| 156 | 6 | 11511 | 36184467 | Production | 36186581 | 36186585 | 188641 |
| 157 | 6 | 11511 | 36184467 | Production | 36186688 | 36186692 | 189503 |
| 158 | 6 | 11511 | 36184467 | Production | 36187039 | 36187043 | 188875 |
| 159 | 6 | 11511 | 36184467 | Production | 36189076 | 36189080 | 188768 |
| 160 | 6 | 11511 | 36184467 | Production | 36191254 | 36191258 | 187381 |
| 161 | 6 | 11511 | 36184467 | Production | 36198700 | 36198704 | 186480 |
| 162 | 6 | 11511 | 36184467 | Production | 36203596 | 36203600 | 187794 |
| 163 | 6 | 11511 | 36184467 | Milk | 36204034 | 36204038 | 138157 |
| 164 | 6 | 11511 | 36184467 | Production | 36204034 | 36204038 | 186572 |
| 165 | 6 | 11511 | 36184467 | Milk | 36205214 | 36205218 | 15002 |
| 166 | 6 | 11511 | 36184467 | Production | 36206851 | 36206855 | 188861 |
| 167 | 6 | 11511 | 36184467 | Production | 36210548 | 36210552 | 187623 |
| 168 | 6 | 11511 | 36184467 | Production | 36211595 | 36211599 | 187645 |
| 169 | 6 | 11511 | 36184467 | Production | 36212184 | 36212188 | 189171 |
| 170 | 6 | 11511 | 36184467 | Production | 36212912 | 36212916 | 187151 |
| 171 | 6 | 11511 | 36184467 | Production | 36216422 | 36216426 | 187483 |
| 172 | 6 | 11511 | 36184467 | Milk | 36218000 | 36218004 | 103890 |
| 173 | 6 | 11511 | 36184467 | Milk | 36218000 | 36218004 | 104754 |
| 174 | 6 | 11511 | 36184467 | Milk | 36219842 | 36219846 | 105620 |
| 175 | 6 | 11511 | 36184467 | Production | 36219842 | 36219846 | 186455 |
| 176 | 6 | 11511 | 36184467 | Production | 36221082 | 36221086 | 187299 |
| 177 | 6 | 11511 | 36184467 | Production | 36221438 | 36221442 | 187378 |
| 178 | 6 | 11511 | 36184467 | Production | 36225616 | 36225620 | 186759 |
| 179 | 6 | 11511 | 36184467 | Production | 36226441 | 36226445 | 186605 |
| 180 | 6 | 11511 | 36184467 | Production | 36226963 | 36226967 | 23701 |
| 181 | 6 | 11511 | 36184467 | Production | 36226963 | 36226967 | 67245 |
| 182 | 6 | 11511 | 36184467 | Production | 36226963 | 36226967 | 67246 |
| 183 | 6 | 11511 | 36184467 | Production | 36226963 | 36226967 | 67247 |
| 184 | 6 | 11511 | 36184467 | Milk | 36226963 | 36226967 | 105509 |
| 185 | 6 | 11511 | 36184467 | Production | 36226963 | 36226967 | 164069 |
| 186 | 6 | 11511 | 36184467 | Production | 36226963 | 36226967 | 164370 |
| 187 | 6 | 11511 | 36184467 | Production | 36226963 | 36226967 | 186841 |
| 188 | 6 | 11511 | 36184467 | Production | 36227839 | 36227843 | 182676 |
| 189 | 6 | 11511 | 36184467 | Production | 36227839 | 36227843 | 188422 |
| 190 | 6 | 11511 | 36184467 | Production | 36232422 | 36232426 | 187967 |
| 191 | 6 | 11511 | 36184467 | Production | 36232742 | 36232746 | 187367 |
| 192 | 6 | 11511 | 36184467 | Production | 36233671 | 36233675 | 188839 |
| 193 | 14 | 25167 | 15929822 | Milk | 15895119 | 15895123 | 25606 |
| 194 | 14 | 25167 | 15929822 | Milk | 15895119 | 15895123 | 26355 |
| 195 | 14 | 25167 | 15929822 | Meat_and_Carcass | 15895119 | 15895123 | 36989 |
| 196 | 14 | 25167 | 15929822 | Milk | 15904094 | 15904098 | 104606 |
| 197 | 14 | 25167 | 15929822 | Milk | 15905538 | 15905542 | 104607 |
| 198 | 14 | 25167 | 15929822 | Exterior | 15929820 | 15929824 | 220989 |
| 199 | 14 | 25167 | 15929822 | Production | 15964283 | 15964287 | 122755 |
| 200 | 14 | 25167 | 15929822 | Production | 15964283 | 15964287 | 123539 |
| 201 | 14 | 25167 | 15929822 | Production | 15965017 | 15965021 | 122756 |
| 202 | 14 | 25167 | 15929822 | Production | 15965017 | 15965021 | 123540 |
| 203 | 14 | 25167 | 15929822 | Production | 15967515 | 15967519 | 122757 |
| 204 | 14 | 25167 | 15929822 | Production | 15967515 | 15967519 | 123541 |
| 205 | 14 | 25167 | 15929822 | Production | 15969384 | 15969388 | 122758 |
| 206 | 14 | 25167 | 15929822 | Production | 15969384 | 15969388 | 123542 |
| 207 | 14 | 25167 | 15929822 | Production | 15972216 | 15972220 | 122759 |
| 208 | 14 | 25167 | 15929822 | Production | 15972216 | 15972220 | 123543 |
| 209 | 14 | 25167 | 15929822 | Production | 15977822 | 15977826 | 122760 |
| 210 | 14 | 25167 | 15929822 | Production | 15977822 | 15977826 | 123544 |
| 211 | 15 | 26782 | 33066384 | Production | 33066382 | 33066386 | 68791 |
| 212 | 15 | 26782 | 33066384 | Meat_and_Carcass | 33107568 | 33107572 | 228743 |
| 213 | 15 | 27199 | 57930281 | Milk | 57892792 | 57892796 | 242339 |
| 214 | 15 | 27199 | 57930281 | Milk | 57914040 | 57914044 | 248429 |
| 215 | 15 | 27199 | 57930281 | Milk | 57926776 | 57926780 | 242396 |
| 216 | 15 | 27199 | 57930281 | Milk | 57926776 | 57926780 | 248428 |
| 217 | 15 | 27199 | 57930281 | Milk | 57949776 | 57949780 | 242391 |
| 218 | 17 | 29742 | 46487068 | Milk | 46449873 | 46449877 | 162016 |
| 219 | 17 | 29742 | 46487068 | Milk | 46449873 | 46449877 | 163641 |
| 220 | 17 | 29742 | 46487068 | Milk | 46452018 | 46452022 | 162017 |
| 221 | 17 | 29742 | 46487068 | Milk | 46452018 | 46452022 | 163642 |
| 222 | 17 | 29742 | 46487068 | Milk | 46455275 | 46455279 | 163230 |
| 223 | 17 | 29742 | 46487068 | Milk | 46455275 | 46455279 | 163643 |
| 224 | 17 | 29742 | 46487068 | Milk | 46457135 | 46457139 | 163231 |
| 225 | 17 | 29742 | 46487068 | Milk | 46457135 | 46457139 | 163644 |
| 226 | 17 | 29742 | 46487068 | Milk | 46460742 | 46460746 | 163159 |
| 227 | 17 | 29742 | 46487068 | Milk | 46460742 | 46460746 | 163583 |
| 228 | 17 | 29742 | 46487068 | Milk | 46463937 | 46463941 | 163232 |
| 229 | 17 | 29742 | 46487068 | Milk | 46463937 | 46463941 | 163645 |
| 230 | 17 | 29742 | 46487068 | Milk | 46474328 | 46474332 | 162015 |
| 231 | 17 | 29742 | 46487068 | Milk | 46474328 | 46474332 | 163640 |
| 232 | 17 | 29742 | 46487068 | Milk | 46474997 | 46475001 | 162014 |
| 233 | 17 | 29742 | 46487068 | Milk | 46474997 | 46475001 | 163639 |
| 234 | 17 | 29742 | 46487068 | Milk | 46476243 | 46476247 | 162013 |
| 235 | 17 | 29742 | 46487068 | Milk | 46476243 | 46476247 | 163638 |
| 236 | 17 | 29742 | 46487068 | Milk | 46476816 | 46476820 | 162012 |
| 237 | 17 | 29742 | 46487068 | Milk | 46476816 | 46476820 | 163637 |
| 238 | 17 | 29742 | 46487068 | Milk | 46480499 | 46480503 | 161948 |
| 239 | 17 | 29742 | 46487068 | Milk | 46480499 | 46480503 | 162037 |
| 240 | 17 | 29742 | 46487068 | Milk | 46481110 | 46481114 | 161941 |
| 241 | 17 | 29742 | 46487068 | Milk | 46481110 | 46481114 | 162030 |
| 242 | 17 | 29742 | 46487068 | Milk | 46481709 | 46481713 | 162019 |
| 243 | 17 | 29742 | 46487068 | Milk | 46481709 | 46481713 | 162100 |
| 244 | 17 | 29742 | 46487068 | Milk | 46482674 | 46482678 | 162020 |
| 245 | 17 | 29742 | 46487068 | Milk | 46482674 | 46482678 | 162101 |
| 246 | 17 | 29742 | 46487068 | Milk | 46483484 | 46483488 | 162021 |
| 247 | 17 | 29742 | 46487068 | Milk | 46483484 | 46483488 | 162102 |
| 248 | 17 | 29742 | 46487068 | Milk | 46529284 | 46529288 | 163162 |
| 249 | 17 | 29742 | 46487068 | Milk | 46529284 | 46529288 | 163584 |
| 250 | 17 | 29772 | 47884497 | Milk | 47837023 | 47837027 | 162764 |
| 251 | 17 | 29772 | 47884497 | Milk | 47844960 | 47844964 | 162876 |
| 252 | 17 | 29772 | 47884497 | Milk | 47851806 | 47851810 | 162921 |
| 253 | 17 | 29772 | 47884497 | Milk | 47855252 | 47855256 | 162648 |
| 254 | 17 | 29772 | 47884497 | Production | 47856400 | 47856404 | 23869 |
| 255 | 17 | 29772 | 47884497 | Production | 47856400 | 47856404 | 68904 |
| 256 | 17 | 29772 | 47884497 | Exterior | 47856400 | 47856404 | 102064 |
| 257 | 17 | 29772 | 47884497 | Exterior | 47884495 | 47884499 | 102065 |
| 258 | 17 | 29772 | 47884497 | Milk | 47924476 | 47924480 | 163030 |
| 259 | 17 | 29772 | 47884497 | Milk | 47924476 | 47924480 | 163517 |
| 260 | 17 | 29772 | 47884497 | Milk | 47926919 | 47926923 | 162858 |
| 261 | 17 | 29772 | 47884497 | Milk | 47926919 | 47926923 | 163422 |
| 262 | 17 | 29784 | 48800954 | Milk | 48759668 | 48759672 | 63966 |
| 263 | 20 | 33665 | 65351653 | Milk | 65321359 | 65321363 | 176120 |
| 264 | 20 | 33665 | 65351653 | Milk | 65350145 | 65350149 | 69962 |
| 265 | 20 | 33665 | 65351653 | Milk | 65352215 | 65352219 | 69963 |
| 266 | 20 | 33665 | 65351653 | Milk | 65357974 | 65357978 | 69964 |
| 267 | 20 | 33665 | 65351653 | Milk | 65363596 | 65363600 | 69626 |
| 268 | 20 | 33665 | 65351653 | Health | 65367232 | 65367236 | 167647 |
| 269 | 21 | 33836 | 4055731 | Reproduction | 4074797 | 4074801 | 145524 |
| 270 | 21 | 33836 | 4055731 | Reproduction | 4079204 | 4079208 | 145458 |
| 271 | 21 | 33836 | 4055731 | Reproduction | 4079247 | 4079251 | 145445 |
| 272 | 21 | 33836 | 4055731 | Reproduction | 4085667 | 4085671 | 145389 |
| 273 | 21 | 33836 | 4055731 | Reproduction | 4085667 | 4085671 | 146710 |
| 274 | 21 | 33836 | 4055731 | Reproduction | 4085817 | 4085821 | 146703 |
| 275 | 23 | 36688 | 44822126 | Milk | 44787659 | 44787663 | 254748 |
| 276 | 23 | 36688 | 44822126 | Milk | 44789048 | 44789052 | 254706 |
| 277 | 23 | 36688 | 44822126 | Meat_and_Carcass | 44807487 | 44807491 | 226965 |
| 278 | 23 | 36688 | 44822126 | Meat_and_Carcass | 44807487 | 44807491 | 229458 |
| 279 | 23 | 36688 | 44822126 | Meat_and_Carcass | 44807487 | 44807491 | 234754 |
| 280 | 23 | 36688 | 44822126 | Milk | 44808987 | 44808991 | 254767 |
| 281 | 23 | 36688 | 44822126 | Milk | 44830780 | 44830784 | 254833 |
| 282 | 23 | 36688 | 44822126 | Milk | 44834127 | 44834131 | 254725 |
| 283 | 23 | 36688 | 44822126 | Milk | 44845256 | 44845260 | 254688 |
| 284 | 23 | 36688 | 44822126 | Milk | 44848146 | 44848150 | 254689 |
| 285 | 23 | 36688 | 44822126 | Milk | 44859653 | 44859657 | 254769 |
| 286 | 24 | 36876 | 857728 | Exterior | 857726 | 857730 | 66121 |
| 287 | 24 | 36876 | 857728 | Reproduction | 885598 | 885602 | 106889 |
| 288 | 28 | 40795 | 20016672 | Milk | 20016670 | 20016674 | 112226 |
| 289 | 28 | 40795 | 20016672 | Milk | 20016670 | 20016674 | 118869 |
| 290 | 28 | 40795 | 20016672 | Milk | 20017763 | 20017767 | 112227 |
| 291 | 28 | 40795 | 20016672 | Milk | 20017763 | 20017767 | 118870 |
| 292 | 28 | 40795 | 20016672 | Milk | 20045011 | 20045015 | 118664 |
| 293 | 28 | 40795 | 20016672 | Milk | 20060184 | 20060188 | 118665 |
| 294 | 28 | 41057 | 35807366 | Reproduction | 35807364 | 35807368 | 138601 |
| 295 | 28 | 41057 | 35807366 | Reproduction | 35807364 | 35807368 | 138602 |
| 296 | 29 | 41785 | 33039863 | Production | 33039861 | 33039865 | 69450 |
| 297 | 29 | 41785 | 33039863 | Production | 33039861 | 33039865 | 69451 |
QTL type
#plotting the percentage of each QTL class annoatted
oldpar <- par(mar=c(1,15,0.5,1))
plot_qtl_info(out.qtl, qtl_plot = "qtl_type", cex=1.5)
Production QTLs
#Setting margin parameter to better fit the axis labels
oldpar<-par(mar=c(5,20,1,1))
#plotting the percentage of each trait annoatted as a Reproduction QTL
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Production")
Reproduction QTLs
#Setting margin parameter to better fit the axis labels
oldpar<-par(mar=c(5,20,1,1))
#plotting the percentage of each trait annoatted as a Reproduction QTL
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Reproduction")
Milk QTLs
#Setting margin parameter to better fit the axis labels
oldpar<-par(mar=c(5,20,1,1))
#plotting the percentage of each trait annoatted as a Reproduction QTL
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Milk")
Meat and Carcass QTLs
#Setting margin parameter to better fit the axis labels
oldpar<-par(mar=c(5,20,1,1))
#plotting the percentage of each trait annoatted as a Reproduction QTL
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class=""Meat_and_Carcass"")
Health QTLs
#Setting margin parameter to better fit the axis labels
oldpar<-par(mar=c(5,20,1,1))
#plotting the percentage of each trait annoatted as a Reproduction QTL
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Health")
Exterior QTLs
#Setting margin parameter to better fit the axis labels
oldpar<-par(mar=c(5,20,1,1))
#plotting the percentage of each trait annoatted as a Reproduction QTL
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Exterior")
#QTL enrichment analysis
out.enrich_qtl_name <-qtl_enrich(qtl_db= qtl_UCD1_2,
qtl_file= out.qtl, qtl_type = "Name",
enrich_type = "genome", chr.subset = NULL,
padj = "fdr",nThreads = 2)
# Sorting the dataframe in ascending order of adj.pval
sorted_df <- out.enrich_qtl_name[order(out.enrich_qtl_name$adj.pval), ]
write.csv(sorted_df,"/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/out_enrich_qtl_genome_name.csv")
out.enrich_qtl_type <-qtl_enrich(qtl_db= qtl_UCD1_2,
qtl_file= out.qtl, qtl_type = "QTL_type",
enrich_type = "genome", chr.subset = NULL,
padj = "fdr",nThreads = 2)
sorted_df_type <- out.enrich_qtl_type[order(out.enrich_qtl_type$adj.pval), ]
write.csv(out.enrich_qtl_type,"/home/bambrozi/2_CORTISOL/GALLO/GWAS_BLUPF90/out_enrich_qtl_genome_type.csv")
#Plots
#Name
#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_name$ID<-paste(out.enrich_qtl_name$QTL," - ","CHR",out.enrich_qtl_name$CHR,sep="")
#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered<-out.enrich_qtl_name[which(out.enrich_qtl_name$adj.pval<0.05),]
#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered, x="ID", pval="adj.pval")
#Type
#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_type$ID<-paste(out.enrich_qtl_type$QTL," - ","CHR",out.enrich_qtl_type$CHR,sep="")
#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered_type<-out.enrich_qtl_type[which(out.enrich_qtl_type$adj.pval<0.05),]
#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered_type, x="ID", pval="adj.pval")
QTL Enrichment outcomes
Enrichment by name (enrichment analysis will be performed for each trait individually)
| X | QTL | N_QTLs | N_QTLs_db | Total_annotated_QTLs | Total_QTLs_db | pvalue | adj.pval | QTL_type |
|---|---|---|---|---|---|---|---|---|
| 27 | Metabolic body weight | 83 | 4039 | 292 | 163224 | 0.0000000 | 0.0000000 | Production |
| 29 | Milk butyric acid content | 22 | 828 | 292 | 163224 | 0.0000000 | 0.0000000 | Milk |
| 30 | Milk caproic acid content | 18 | 675 | 292 | 163224 | 0.0000000 | 0.0000000 | Milk |
| 12 | Dry matter intake | 17 | 2186 | 292 | 163224 | 0.0000006 | 0.0000090 | Production |
| 2 | Aggressive behavior | 2 | 8 | 292 | 163224 | 0.0000887 | 0.0008719 | Exterior |
| 25 | Length of productive life | 13 | 2004 | 292 | 163224 | 0.0000815 | 0.0008719 | Production |
| 35 | Milk pentadecylic acid content | 5 | 280 | 292 | 163224 | 0.0001640 | 0.0013823 | Milk |
| 33 | Milk iron content | 4 | 217 | 292 | 163224 | 0.0006685 | 0.0049302 | Milk |
| 15 | Dystocia | 2 | 31 | 292 | 163224 | 0.0014331 | 0.0090411 | Reproduction |
| 16 | Fecal larva count | 3 | 125 | 292 | 163224 | 0.0015324 | 0.0090411 | Health |
| 4 | Body weight | 16 | 4289 | 292 | 163224 | 0.0050041 | 0.0268400 | Production |
| 14 | Duration of inactivity during open field test | 1 | 9 | 292 | 163224 | 0.0159862 | 0.0785990 | Exterior |
| 13 | Duration of exploration during novel object test | 1 | 16 | 292 | 163224 | 0.0282437 | 0.1281828 | Exterior |
| 5 | Body weight gain | 6 | 1354 | 292 | 163224 | 0.0362258 | 0.1526660 | Production |
| 3 | Anti-Müllerian hormone level | 1 | 26 | 292 | 163224 | 0.0454908 | 0.1789305 | Health |
| 46 | Polyunsaturated fatty acid content | 1 | 28 | 292 | 163224 | 0.0489034 | 0.1803314 | Meat and Carcass |
| 45 | Palmitoleic acid content | 1 | 46 | 292 | 163224 | 0.0790752 | 0.2744374 | Meat and Carcass |
| 42 | Muscle sodium content | 1 | 56 | 292 | 163224 | 0.0954231 | 0.3127758 | Meat and Carcass |
| 28 | Milk alpha-S1-casein percentage | 1 | 65 | 292 | 163224 | 0.1098888 | 0.3382449 | Milk |
| 43 | Myristoleic acid content | 1 | 68 | 292 | 163224 | 0.1146593 | 0.3382449 | Meat and Carcass |
| 39 | Milk unglycosylated kappa-casein percentage | 7 | 2351 | 292 | 163224 | 0.1315212 | 0.3695119 | Milk |
| 21 | Interdigital hyperplasia | 1 | 93 | 292 | 163224 | 0.1534352 | 0.4052354 | Exterior |
| 41 | Multiple birth | 1 | 96 | 292 | 163224 | 0.1579731 | 0.4052354 | Reproduction |
| 19 | Gestation length | 2 | 636 | 292 | 163224 | 0.3149170 | 0.7146195 | Reproduction |
| 23 | Interval to first estrus after calving | 3 | 1053 | 292 | 163224 | 0.2917668 | 0.7146195 | Reproduction |
| 36 | Milk protein percentage | 18 | 8803 | 292 | 163224 | 0.3139794 | 0.7146195 | Milk |
| 8 | Calving ease | 6 | 3819 | 292 | 163224 | 0.6804471 | 0.9778366 | Reproduction |
| 10 | Conception rate | 2 | 1255 | 292 | 163224 | 0.6577041 | 0.9778366 | Reproduction |
| 17 | Feet and leg conformation | 1 | 627 | 292 | 163224 | 0.6752950 | 0.9778366 | Exterior |
| 18 | Foot angle | 1 | 672 | 292 | 163224 | 0.7005282 | 0.9778366 | Exterior |
| 22 | Interval from first to last insemination | 1 | 445 | 292 | 163224 | 0.5497190 | 0.9778366 | Reproduction |
| 24 | Lean meat yield | 1 | 621 | 292 | 163224 | 0.6717742 | 0.9778366 | Meat and Carcass |
| 38 | Milk riboflavin content | 1 | 509 | 292 | 163224 | 0.5986072 | 0.9778366 | Milk |
| 48 | PTA type | 1 | 627 | 292 | 163224 | 0.6752950 | 0.9778366 | Production |
| 49 | Rear leg placement - side view | 1 | 430 | 292 | 163224 | 0.5374280 | 0.9778366 | Exterior |
| 50 | Rump width | 1 | 526 | 292 | 163224 | 0.6106790 | 0.9778366 | Production |
| 52 | Somatic cell score | 2 | 1122 | 292 | 163224 | 0.5971200 | 0.9778366 | Health |
| 53 | Stillbirth | 2 | 1363 | 292 | 163224 | 0.7013705 | 0.9778366 | Reproduction |
| 54 | Strength | 1 | 664 | 292 | 163224 | 0.6961897 | 0.9778366 | Exterior |
| 55 | Subcutaneous fat thickness | 1 | 331 | 292 | 163224 | 0.4474847 | 0.9778366 | Meat and Carcass |
| 57 | Udder attachment | 1 | 655 | 292 | 163224 | 0.6912339 | 0.9778366 | Exterior |
| 58 | Udder depth | 1 | 695 | 292 | 163224 | 0.7126605 | 0.9778366 | Exterior |
| 59 | Udder height | 1 | 504 | 292 | 163224 | 0.5949862 | 0.9778366 | Exterior |
| 6 | Bovine respiratory disease susceptibility | 1 | 789 | 292 | 163224 | 0.7573589 | 0.9935536 | Health |
| 20 | Inseminations per conception | 1 | 790 | 292 | 163224 | 0.7577951 | 0.9935536 | Reproduction |
| 1 | Age at puberty | 1 | 8222 | 292 | 163224 | 0.9999997 | 0.9999997 | Reproduction |
| 7 | Bovine tuberculosis susceptibility | 1 | 1155 | 292 | 163224 | 0.8744991 | 0.9999997 | Health |
| 9 | Carcass weight | 1 | 2020 | 292 | 163224 | 0.9737344 | 0.9999997 | Meat and Carcass |
| 11 | Connective tissue amount | 1 | 3142 | 292 | 163224 | 0.9965892 | 0.9999997 | Meat and Carcass |
| 26 | Marbling score | 1 | 1817 | 292 | 163224 | 0.9620577 | 0.9999997 | Meat and Carcass |
| 31 | Milk fat percentage | 9 | 10941 | 292 | 163224 | 0.9979006 | 0.9999997 | Milk |
| 32 | Milk fat yield | 2 | 8220 | 292 | 163224 | 0.9999954 | 0.9999997 | Milk |
| 34 | Milk kappa-casein percentage | 5 | 4499 | 292 | 163224 | 0.9064431 | 0.9999997 | Milk |
| 37 | Milk protein yield | 1 | 3093 | 292 | 163224 | 0.9962701 | 0.9999997 | Milk |
| 40 | Milk yield | 4 | 6432 | 292 | 163224 | 0.9970834 | 0.9999997 | Milk |
| 44 | Net merit | 1 | 903 | 292 | 163224 | 0.8023701 | 0.9999997 | Production |
| 47 | Pregnancy rate | 1 | 944 | 292 | 163224 | 0.8164356 | 0.9999997 | Reproduction |
| 51 | Shear force | 2 | 2954 | 292 | 163224 | 0.9692776 | 0.9999997 | Meat and Carcass |
| 56 | Tenderness score | 4 | 3483 | 292 | 163224 | 0.8712212 | 0.9999997 | Meat and Carcass |
Enrichment by QTL_type (enrichment processes performed for the QTL classes)
| X | QTL | N_QTLs | N_QTLs_db | Total_annotated_QTLs | Total_QTLs_db | pvalue | adj.pval |
|---|---|---|---|---|---|---|---|
| 1 | Exterior | 12 | 9077 | 292 | 163224 | 0.8914062 | 1 |
| 2 | Health | 8 | 5889 | 292 | 163224 | 0.8293292 | 1 |
| 3 | Meat and Carcass | 15 | 18258 | 292 | 163224 | 0.9998999 | 1 |
| 4 | Milk | 97 | 75352 | 292 | 163224 | 0.9999974 | 1 |
| 5 | Production | 138 | 19640 | 292 | 163224 | 0.0000000 | 0 |
| 6 | Reproduction | 22 | 35008 | 292 | 163224 | 1.0000000 | 1 |
From the online version of GPROFILER i got the following results.
Legend
rs110079750
rs109276211
rs109631116
rs42029843
rs110428837
rs110081798
rs110160157
rs110038841
rs109868537
rs110565206
rs42751504
rs110121846
rs110935391
rs109584097
rs41585925
rs110506037
rs109273103
rs109575643
rs43305418
rs43377276
rs43691687
rs41764450
rs42305073 No
rs42462935 No
rs42752353
rs81154019 No
rs41645754
rs41566051
rs41636137
rs41615935
rs29026778
rs29013980
rs29023486
rs29027498
rs29017061
rs29018741
rs29013301
rs29012492
It is interesting that 3 significant SNPs falled in the same gene CSMD2 on chromosome 3 rs109868537 rs42751504 rs42752353
For the gene CNTNAP2 on chromosome 4 also have 2 significant SNPs rs41566051 rs29012492
We have 2 different output files
In this file we can find the heritabilit (SD) and for instance the convergence (similarity)
In our example:
EFFECT 3: Animal random effect, has one for each animal and it is the EBV or GEBV.
We’LL estimate the GRM on GCTA fro all 252 animals.
First I will need to build a
Now we’ll estimate the Variance Components for all 252 animals on BLUPF90+ With the Variance Components Estimations we’ll perform a GWAS on BLUPF90+ only for the animals with Extreme Phenotypes.
Now we are going to apply a correction for multiple testing modifying the Bonferroni test adjusting not for the total number of SNPs but for the number of the independent segments in the genome.
Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")
library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
summarise(total_length = sum(`Seq length`)) %>%
pull(total_length)
# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8
# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)
NeL <- Ne*L_M
# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)
gwas<- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/GWAS_result_NO_CONSTRAIN.loco.mlma",
head=T, stringsAsFactors = F)
gwas$Chr<- as.factor(gwas$Chr)
gwas$logP<- -log10(gwas$p)
rmv<- which(gwas$logP == "NaN")
if (length(rmv) >=1) {gwas <- gwas[-rmv,]}
bonf<- -log10(0.15/Me)
library(GHap)
ghap.manhattan(data=gwas,chr="Chr", bp="bp", y="logP", type="p", pch = 20,
cex=1, lwd=1, ylab="", xlab="Chromossomes",
main="GWAS Cortisol", backcolor="#F5EFE780", chr.ang=0,)
abline(h=(bonf), col="red2")
legend("topright", col="red2", lwd=2, c("Bonferroni corr. for ind. segments"), bty="n")
library(tidyverse)
SNP_sig_bonf_ind_seg <- filter(gwas, logP > bonf)
write.table(SNP_sig_bonf_ind_seg, file = "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/SNP_sig15_Bonf_ind_seg.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
write.csv(SNP_sig_bonf_ind_seg, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/SNP_sig15_Bonf_ind_seg.txt")
The SNPs below are the significant after the correction above
| X | Chr | SNP | bp | A1 | A2 | Freq | b | se | p | logP |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 3 | BTB-00148619 | 105840426 | A | C | 0.321429 | 410.739 | 104.530 | 0.0000852 | 4.069736 |
| 2 | 3 | ARS-BFGL-NGS-118207 | 110866523 | A | C | 0.421429 | 402.730 | 103.634 | 0.0001019 | 3.991928 |
| 3 | 22 | ARS-BFGL-NGS-23411 | 5134078 | A | C | 0.214286 | 415.637 | 109.100 | 0.0001391 | 3.856526 |
Bellow we inserted the rsID searched at SNPCHIMP
| X | Chr | rsID | SNP | bp | A1 | A2 | Freq | b | se | p | logP |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 3 | rs43357861 | BTB-00148619 | 105840426 | A | C | 0.321429 | 410.739 | 104.530 | 0.0000852 | 4.069736 |
| 2 | 3 | rs110081798 | ARS-BFGL-NGS-118207 | 110866523 | A | C | 0.421429 | 402.730 | 103.634 | 0.0001019 | 3.991928 |
| 3 | 22 | rs110031217 | ARS-BFGL-NGS-23411 | 5134078 | A | C | 0.214286 | 415.637 | 109.100 | 0.0001391 | 3.856526 |
# GALLO
#import a QTL annotation file
qtl_UCD1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Animal_QTLdb_release53_cattleARS_UCD1.gff.gz",file_type="gff")
#import a gene annotation file
gene_UDC1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Bos_taurus.ARS-UCD1.2.110.gtf.gz",file_type="gtf")
#import MARKER files = the GWAS output
gwas <- read.csv(file = "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/SNP_sig15_Bonf_ind_seg.csv")
# Assuming "gwas" is your dataframe
gwas <- subset(gwas, select = c(Chr, SNP, bp))
colnames(gwas) <- c("CHR","SNP", "BP")
#FINDING GENES AND QTLs ARROUND THE MARKER
#FINDING GENES
out.genes <- find_genes_qtls_around_markers(db_file= gene_UDC1_2,
marker_file= gwas,
method = "gene",
marker = "snp",
interval = 50000,
nThreads = NULL)
write.table(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/EXTREM_PHENO_BY_SD/out_genes_extr_15.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
write.csv(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/EXTREM_PHENO_BY_SD/out_genes_extr_15.csv")
#FINDING QTLs
out.qtl <- find_genes_qtls_around_markers(db_file= qtl_UCD1_2,
marker_file= gwas,
method = "qtl",
marker = "snp",
interval = 50000,
nThreads = NULL)
write.table(out.qtl, file = "/home/bambrozi/2_CORTISOL/GALLO/EXTREM_PHENO_BY_SD/out_qtl_extr_15.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
library(tidyverse)
out.qtl.clean <- select(out.qtl, c("CHR", "SNP", "BP", "QTL_type", "start_pos", "end_pos","QTL_ID"))
write.csv(out.qtl.clean, file = "/home/bambrozi/2_CORTISOL/GALLO/EXTREM_PHENO_BY_SD/out_qtl_extr_15.csv")
The GALLO output are bellow:
For GENES
| X | CHR | SNP | BP | chr | start_pos | end_pos | width | strand | gene_id | gene_name | gene_biotype |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 3 | BTB-00148619 | 105840426 | 3 | 105807211 | 105810027 | 2817 | + | ENSBTAG00000020622 | COL9A2 | lncRNA |
| 2 | 3 | BTB-00148619 | 105840426 | 3 | 105824376 | 105867157 | 42782 | - | ENSBTAG00000044178 | ZMPSTE24 | protein_coding |
| 3 | 3 | BTB-00148619 | 105840426 | 3 | 105884724 | 105887360 | 2637 | - | ENSBTAG00000019698 | TMCO2 | protein_coding |
| 4 | 3 | ARS-BFGL-NGS-118207 | 110866523 | 3 | 110769753 | 110827938 | 58186 | + | ENSBTAG00000013867 | DLGAP3 | protein_coding |
| 5 | 3 | ARS-BFGL-NGS-118207 | 110866523 | 3 | 110835276 | 110839536 | 4261 | + | ENSBTAG00000014235 | SMIM12 | protein_coding |
| 6 | 3 | ARS-BFGL-NGS-118207 | 110866523 | 3 | 110888198 | 110890904 | 2707 | - | ENSBTAG00000013881 | GJA4 | protein_coding |
| 7 | 3 | ARS-BFGL-NGS-118207 | 110866523 | 3 | 110900183 | 110905849 | 5667 | - | ENSBTAG00000012584 | GJB3 | protein_coding |
| 8 | 22 | ARS-BFGL-NGS-23411 | 5134078 | 22 | 5091037 | 5184321 | 93285 | + | ENSBTAG00000019832 | TGFBR2 | protein_coding |
FOR QTLs
| X | CHR | SNP | BP | QTL_type | start_pos | end_pos | QTL_ID |
|---|---|---|---|---|---|---|---|
| 1 | 3 | BTB-00148619 | 105840426 | Production | 105835011 | 105835015 | 287197 |
| 2 | 3 | ARS-BFGL-NGS-118207 | 110866523 | Meat_and_Carcass | 110866521 | 110866525 | 152332 |
| 3 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5109498 | 5109502 | 151609 |
| 4 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5134076 | 5134080 | 151620 |
| 5 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5173197 | 5173201 | 151601 |
#QTL enrichment analysis
out.enrich_qtl_name <-qtl_enrich(qtl_db= qtl_UCD1_2,
qtl_file= out.qtl, qtl_type = "Name",
enrich_type = "genome", chr.subset = NULL,
padj = "fdr",nThreads = 2)
# Sorting the dataframe in ascending order of adj.pval
sorted_df <- out.enrich_qtl_name[order(out.enrich_qtl_name$adj.pval), ]
write.csv(sorted_df,"/home/bambrozi/2_CORTISOL/GALLO/EXTREM_PHENO_BY_SD/out_enrich_qtl_genome_name_extr_15.csv")
out.enrich_qtl_type <-qtl_enrich(qtl_db= qtl_UCD1_2,
qtl_file= out.qtl, qtl_type = "QTL_type",
enrich_type = "genome", chr.subset = NULL,
padj = "fdr",nThreads = 2)
sorted_df_type <- out.enrich_qtl_type[order(out.enrich_qtl_type$adj.pval), ]
write.csv(out.enrich_qtl_type,"/home/bambrozi/2_CORTISOL/GALLO/EXTREM_PHENO_BY_SD/out_enrich_qtl_genome_type_extr_15.csv")
#Plots
#Name
#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_name$ID<-paste(out.enrich_qtl_name$QTL," - ","CHR",out.enrich_qtl_name$CHR,sep="")
#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered<-out.enrich_qtl_name[which(out.enrich_qtl_name$adj.pval<0.05),]
#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered, x="ID", pval="adj.pval")
#Type
#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_type$ID<-paste(out.enrich_qtl_type$QTL," - ","CHR",out.enrich_qtl_type$CHR,sep="")
#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered_type<-out.enrich_qtl_type[which(out.enrich_qtl_type$adj.pval<0.05),]
#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered_type, x="ID", pval="adj.pval")
QTL Enrichment outcomes
Enrichment by name (enrichment analysis will be performed for each trait individually)
| X | QTL | N_QTLs | N_QTLs_db | Total_annotated_QTLs | Total_QTLs_db | pvalue | adj.pval | QTL_type |
|---|---|---|---|---|---|---|---|---|
| 2 | Muscle carnosine content | 3 | 67 | 5 | 163224 | 0.0000000 | 0.0000000 | Meat and Carcass |
| 3 | Polyunsaturated fatty acid content | 1 | 28 | 5 | 163224 | 0.0008574 | 0.0012861 | Meat and Carcass |
| 1 | Body depth | 1 | 616 | 5 | 163224 | 0.0187281 | 0.0187281 | Production |
Enrichment by QTL_type (enrichment processes performed for the QTL classes)
| X | QTL | N_QTLs | N_QTLs_db | Total_annotated_QTLs | Total_QTLs_db | pvalue | adj.pval |
|---|---|---|---|---|---|---|---|
| 1 | Meat and Carcass | 4 | 18258 | 5 | 163224 | 0.0007125 | 0.0014251 |
| 2 | Production | 1 | 19640 | 5 | 163224 | 0.4732476 | 0.4732476 |
From the online version of GPROFILER i got the following results.
Legend
pheno <- read.table("/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", header = T)
data_final <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv", header = T)
ids_eq <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv", header = T)
# Create data_final$cdn_id by matching IDs with elora_id
data_final$cdn_id <- ids_eq$cdn_id[match(data_final$ID, ids_eq$elora_id)]
fixeff <- data_final[,c("ID", "BIRTH_YEAR", "Sampling_date", "cdn_id")]
fixeff <- fixeff[fixeff$cdn_id %in% pheno$cdn_id, ]
fixeff$FID <- "HO"
fixeff <- fixeff[, c("FID", "cdn_id", "BIRTH_YEAR", "Sampling_date")]
identical(fixeff$cdn_id, pheno$cdn_id)
write.table(fixeff, "/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/fixeff.txt", quote = F, row.names = F, col.names = T)
#!/bin/bash
DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/GWAS_result
PHENO=/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt
FIXEFF=/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/fixeff.txt
/home/local/bin/gcta \
--bfile ${DATA} \
--mlma-loco \
--pheno ${PHENO} \
--qcovar ${FIXEFF} \
--autosome-num 29 \
--autosome \
--out ${RESULT}
Now we are going to apply a correction for multiple testing modifying the Bonferroni test adjusting not for the total number of SNPs but for the number of the independent segments in the genome.
{r eval=FALSE}
Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")
library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
summarise(total_length = sum(`Seq length`)) %>%
pull(total_length)
# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8
# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)
NeL <- Ne*L_M
# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)
gwas<- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/GWAS_result.loco.mlma",
head=T, stringsAsFactors = F)
gwas$Chr<- as.factor(gwas$Chr)
gwas$logP<- -log10(gwas$p)
rmv<- which(gwas$logP == "NaN")
if (length(rmv) >=1) {gwas <- gwas[-rmv,]}
bonf<- -log10(0.05/Me)
library(GHap)
ghap.manhattan(data=gwas,chr="Chr", bp="bp", y="logP", type="p", pch = 20,
cex=1, lwd=1, ylab="", xlab="Chromossomes",
main="GWAS Cortisol", backcolor="#F5EFE780", chr.ang=0,)
abline(h=(bonf), col="red2")
legend("topleft", col="red2", lwd=2, c("Bonferroni corr. for ind. segments"), bty="n")
library(tidyverse)
SNP_sig_bonf_ind_seg <- filter(gwas, logP > bonf)
write.table(SNP_sig_bonf_ind_seg, file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/SNP_sig_Bonf_ind_seg.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
write.csv(SNP_sig_bonf_ind_seg, "/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/SNP_sig_Bonf_ind_seg2.csv")
The SNPs below are the significant after the correction above
| X | Chr | SNP | bp | A1 | A2 | Freq | b | se | p | logP |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 22 | ARS-BFGL-NGS-23411 | 5134078 | A | C | 0.1872510 | 196.185 | 43.9934 | 8.20e-06 | 5.085250 |
| 2 | 22 | ARS-BFGL-NGS-26419 | 46052082 | A | C | 0.0816733 | 295.245 | 66.9504 | 1.03e-05 | 4.985404 |
| 3 | 26 | BTB-00926636 | 17857480 | C | A | 0.0258964 | 491.154 | 112.4840 | 1.26e-05 | 4.898635 |
| 4 | 26 | BTA-62125-no-rs | 17899619 | C | A | 0.0258964 | 491.154 | 112.4840 | 1.26e-05 | 4.898635 |
| 5 | 28 | BTA-99379-no-rs | 41666186 | A | C | 0.0498008 | 394.376 | 83.4532 | 2.30e-06 | 5.639587 |
Bellow we inserted the rsID searched at SNPCHIMP
| X | Chr | rsID | SNP | bp | A1 | A2 | Freq | b | se | p | logP |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 22 | rs110031217 | ARS-BFGL-NGS-23411 | 5134078 | A | C | 0.1872510 | 196.185 | 43.9934 | 8.20e-06 | 5.085250 |
| 2 | 22 | rs110991998 | ARS-BFGL-NGS-26419 | 46052082 | A | C | 0.0816733 | 295.245 | 66.9504 | 1.03e-05 | 4.985404 |
| 3 | 26 | rs42089058 | BTB-00926636 | 17857480 | C | A | 0.0258964 | 491.154 | 112.4840 | 1.26e-05 | 4.898635 |
| 4 | 26 | rs41644634 | BTA-62125-no-rs | 17899619 | C | A | 0.0258964 | 491.154 | 112.4840 | 1.26e-05 | 4.898635 |
| 5 | 28 | rs41567074 | BTA-99379-no-rs | 41666186 | A | C | 0.0498008 | 394.376 | 83.4532 | 2.30e-06 | 5.639587 |
gwas<- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/GWAS_result.loco.mlma",
head=T, stringsAsFactors = F)
# Prepare the dataset
don <- gwas %>%
group_by(Chr) %>%
summarise(chr_len = max(bp)) %>%
mutate(tot = cumsum(as.numeric(chr_len)) - as.numeric(chr_len)) %>%
select(-chr_len) %>%
left_join(gwas, ., by = c("Chr" = "Chr")) %>%
arrange(Chr, bp) %>%
mutate(BPcum = as.numeric(bp) + tot) %>%
mutate(is_highlight = ifelse(logP > bonf, "yes", "no")) %>%
mutate(is_annotate = ifelse(logP > bonf, "yes", "no"))
# Remove rows with missing BPcum values
don <- don %>% filter(!is.na(BPcum))
# Prepare X axis
axisdf <- don %>%
group_by(Chr) %>%
summarize(center = (max(BPcum) + min(BPcum)) / 2)
# Find the maximum logP value for setting y-axis limits
max_logP <- max(don$logP, na.rm = TRUE)
# Make the plot
ggplot(don, aes(x = BPcum, y = logP)) +
geom_point(aes(color = as.factor(Chr)), alpha = 0.8, size = 1.3) +
scale_color_manual(values = rep(c("grey", "skyblue"), 22)) +
scale_x_continuous(label = axisdf$Chr, breaks = axisdf$center) +
scale_y_continuous(expand = c(0, 0), limits = c(0, max_logP + 1)) + # Set y-axis limit
geom_point(data = subset(don, is_highlight == "yes"), color = "orange", size = 2) +
geom_text_repel(data = subset(don, is_annotate == "yes"), aes(label = SNP), size = 2) +
theme_bw() +
theme(
legend.position = "none",
panel.border = element_blank(),
panel.grid.major.x = element_blank(),
panel.grid.minor.x = element_blank()
) +
labs(title = "GWAS Cortisol", x = "Chromosomes", y = "-log10(p)")
After insert all rsID on VEP the summary is shown below:
As can be seen, the VEP only analysed 14 variants
Bellow I’m gonna show the “genome view” for each SNP (variant) recovered from VEP:
rs110031217
rs110991998
rs42089058
rs41644634
rs41567074
ps. the unique reference genome available in the VEP is the ARS-UCD1.3, which is not a problem once it is working with the rsID, which for sure is our variant.
# GALLO
#import a QTL annotation file
qtl_UCD1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Animal_QTLdb_release53_cattleARS_UCD1.gff.gz",file_type="gff")
#import a gene annotation file
gene_UDC1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Bos_taurus.ARS-UCD1.2.110.gtf.gz",file_type="gtf")
#import MARKER files = the GWAS output
gwas <- read.csv(file = "/home/bambrozi/2_CORTISOL/GWAS/GWAS_plus_BY_Samp/SNP_sig_Bonf_ind_seg.csv")
# Assuming "gwas" is your dataframe
gwas <- subset(gwas, select = c(Chr, SNP, bp))
colnames(gwas) <- c("CHR","SNP", "BP")
#FINDING GENES AND QTLs ARROUND THE MARKER
#FINDING GENES
out.genes <- find_genes_qtls_around_markers(db_file= gene_UDC1_2,
marker_file= gwas,
method = "gene",
marker = "snp",
interval = 50000,
nThreads = NULL)
write.table(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/GALLO_BY_SD/out_genes2.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
write.csv(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/GALLO_BY_SD/out_genes2.csv")
#FINDING QTLs
out.qtl <- find_genes_qtls_around_markers(db_file= qtl_UCD1_2,
marker_file= gwas,
method = "qtl",
marker = "snp",
interval = 50000,
nThreads = NULL)
write.table(out.qtl, file = "/home/bambrozi/2_CORTISOL/GALLO/GALLO_BY_SD/out_qtl2.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
library(tidyverse)
out.qtl.clean <- select(out.qtl, c("CHR", "SNP", "BP", "QTL_type", "start_pos", "end_pos","QTL_ID"))
write.csv(out.qtl.clean, file = "/home/bambrozi/2_CORTISOL/GALLO/GALLO_BY_SD/out_qtl_clean2.csv")
Dowloading the .gtf file from Ensembl https://useast.ensembl.org/info/data/ftp/index.html
Downloading the .gff file from AnimalQTLdb https://www.animalgenome.org/cgi-bin/QTLdb/index
The GALLO output are bellow:
For GENES
| X | CHR | SNP | BP | chr | start_pos | end_pos | width | strand | gene_id | gene_name | gene_biotype |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 22 | ARS-BFGL-NGS-26419 | 46052082 | 22 | 45924535 | 46818511 | 893977 | - | ENSBTAG00000013117 | CACNA2D3 | protein_coding |
| 2 | 22 | ARS-BFGL-NGS-26419 | 46052082 | 22 | 46051998 | 46062657 | 10660 | + | ENSBTAG00000013124 | LRTM1 | protein_coding |
| 3 | 22 | ARS-BFGL-NGS-23411 | 5134078 | 22 | 5091037 | 5184321 | 93285 | + | ENSBTAG00000019832 | TGFBR2 | protein_coding |
| 4 | 26 | BTB-00926636 | 17857480 | 26 | 17703169 | 17846407 | 143239 | - | ENSBTAG00000011743 | TLL2 | protein_coding |
| 5 | 26 | BTB-00926636 | 17857480 | 26 | 17851539 | 17912665 | 61127 | - | ENSBTAG00000016298 | TM9SF3 | protein_coding |
| 6 | 26 | BTA-62125-no-rs | 17899619 | 26 | 17851539 | 17912665 | 61127 | - | ENSBTAG00000016298 | TM9SF3 | protein_coding |
| 7 | 26 | BTA-62125-no-rs | 17899619 | 26 | 17925643 | 18029878 | 104236 | - | ENSBTAG00000019872 | PIK3AP1 | protein_coding |
| 8 | 28 | BTA-99379-no-rs | 41666186 | 28 | 41609015 | 41649016 | 40002 | - | ENSBTAG00000007540 | GLUD1 | protein_coding |
| 9 | 28 | BTA-99379-no-rs | 41666186 | 28 | 41650333 | 41763371 | 113039 | + | ENSBTAG00000019194 | SHLD2 | protein_coding |
FOR QTLs
| X | CHR | SNP | BP | QTL_type | start_pos | end_pos | QTL_ID |
|---|---|---|---|---|---|---|---|
| 1 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5109498 | 5109502 | 151609 |
| 2 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5134076 | 5134080 | 151620 |
| 3 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5173197 | 5173201 | 151601 |
| 4 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51802 |
| 5 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51803 |
| 6 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51804 |
| 7 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51805 |
| 8 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Production | 46052080 | 46052084 | 51806 |
| 9 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Production | 46052080 | 46052084 | 51807 |
| 10 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51808 |
| 11 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51809 |
| 12 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Health | 46052080 | 46052084 | 51810 |
| 13 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Reproduction | 46052080 | 46052084 | 51811 |
| 14 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51812 |
| 15 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Exterior | 46052080 | 46052084 | 51813 |
| 16 | 22 | ARS-BFGL-NGS-26419 | 46052082 | Milk | 46073517 | 46073521 | 243365 |
| 17 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 195264 |
| 18 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 200269 |
| 19 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 200270 |
| 20 | 26 | BTB-00926636 | 17857480 | Milk | 17809768 | 17809772 | 205295 |
| 21 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 198859 |
| 22 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 203868 |
| 23 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 203869 |
| 24 | 26 | BTB-00926636 | 17857480 | Milk | 17810363 | 17810367 | 210297 |
| 25 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 198858 |
| 26 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 203866 |
| 27 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 203867 |
| 28 | 26 | BTB-00926636 | 17857480 | Milk | 17811021 | 17811025 | 210296 |
| 29 | 26 | BTB-00926636 | 17857480 | Milk | 17811725 | 17811729 | 197493 |
| 30 | 26 | BTB-00926636 | 17857480 | Milk | 17811725 | 17811729 | 202704 |
| 31 | 26 | BTB-00926636 | 17857480 | Milk | 17812273 | 17812277 | 195685 |
| 32 | 26 | BTB-00926636 | 17857480 | Milk | 17812273 | 17812277 | 200785 |
| 33 | 26 | BTB-00926636 | 17857480 | Milk | 17813212 | 17813216 | 195285 |
| 34 | 26 | BTB-00926636 | 17857480 | Milk | 17813212 | 17813216 | 200301 |
| 35 | 26 | BTB-00926636 | 17857480 | Milk | 17813978 | 17813982 | 196236 |
| 36 | 26 | BTB-00926636 | 17857480 | Milk | 17813978 | 17813982 | 201468 |
| 37 | 26 | BTB-00926636 | 17857480 | Milk | 17815237 | 17815241 | 198002 |
| 38 | 26 | BTB-00926636 | 17857480 | Milk | 17815237 | 17815241 | 203190 |
| 39 | 26 | BTB-00926636 | 17857480 | Milk | 17817223 | 17817227 | 34750 |
| 40 | 26 | BTB-00926636 | 17857480 | Milk | 17817223 | 17817227 | 195586 |
| 41 | 26 | BTB-00926636 | 17857480 | Milk | 17821297 | 17821301 | 197235 |
| 42 | 26 | BTB-00926636 | 17857480 | Milk | 17821297 | 17821301 | 202455 |
| 43 | 26 | BTB-00926636 | 17857480 | Milk | 17822209 | 17822213 | 197783 |
| 44 | 26 | BTB-00926636 | 17857480 | Milk | 17822209 | 17822213 | 202983 |
| 45 | 26 | BTB-00926636 | 17857480 | Milk | 17823105 | 17823109 | 196204 |
| 46 | 26 | BTB-00926636 | 17857480 | Milk | 17823105 | 17823109 | 201419 |
| 47 | 26 | BTB-00926636 | 17857480 | Milk | 17824770 | 17824774 | 196867 |
| 48 | 26 | BTB-00926636 | 17857480 | Milk | 17824770 | 17824774 | 202101 |
| 49 | 26 | BTB-00926636 | 17857480 | Milk | 17833551 | 17833555 | 195748 |
| 50 | 26 | BTB-00926636 | 17857480 | Milk | 17833551 | 17833555 | 200855 |
| 51 | 26 | BTB-00926636 | 17857480 | Milk | 17833551 | 17833555 | 206213 |
| 52 | 26 | BTB-00926636 | 17857480 | Milk | 17865069 | 17865073 | 197144 |
| 53 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17865069 | 17865073 | 197144 |
| 54 | 26 | BTB-00926636 | 17857480 | Milk | 17865069 | 17865073 | 202367 |
| 55 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17865069 | 17865073 | 202367 |
| 56 | 26 | BTB-00926636 | 17857480 | Milk | 17865069 | 17865073 | 208485 |
| 57 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17865069 | 17865073 | 208485 |
| 58 | 26 | BTB-00926636 | 17857480 | Milk | 17871201 | 17871205 | 197611 |
| 59 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17871201 | 17871205 | 197611 |
| 60 | 26 | BTB-00926636 | 17857480 | Milk | 17871201 | 17871205 | 202816 |
| 61 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17871201 | 17871205 | 202816 |
| 62 | 26 | BTB-00926636 | 17857480 | Milk | 17871201 | 17871205 | 209076 |
| 63 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17871201 | 17871205 | 209076 |
| 64 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 63656 |
| 65 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 63656 |
| 66 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 199068 |
| 67 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 199068 |
| 68 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 204113 |
| 69 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 204113 |
| 70 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 204114 |
| 71 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 204114 |
| 72 | 26 | BTB-00926636 | 17857480 | Milk | 17885871 | 17885875 | 210399 |
| 73 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17885871 | 17885875 | 210399 |
| 74 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 63657 |
| 75 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 63657 |
| 76 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 199069 |
| 77 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 199069 |
| 78 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 204115 |
| 79 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 204115 |
| 80 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 204116 |
| 81 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 204116 |
| 82 | 26 | BTB-00926636 | 17857480 | Milk | 17895273 | 17895277 | 210400 |
| 83 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895273 | 17895277 | 210400 |
| 84 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 199070 |
| 85 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 199070 |
| 86 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 204117 |
| 87 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 204117 |
| 88 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 204118 |
| 89 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 204118 |
| 90 | 26 | BTB-00926636 | 17857480 | Milk | 17895803 | 17895807 | 210401 |
| 91 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17895803 | 17895807 | 210401 |
| 92 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 199071 |
| 93 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 199071 |
| 94 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 204119 |
| 95 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 204119 |
| 96 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 204120 |
| 97 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 204120 |
| 98 | 26 | BTB-00926636 | 17857480 | Milk | 17900304 | 17900308 | 210402 |
| 99 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17900304 | 17900308 | 210402 |
| 100 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 63658 |
| 101 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 63658 |
| 102 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 199072 |
| 103 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 199072 |
| 104 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 204121 |
| 105 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 204121 |
| 106 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 204122 |
| 107 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 204122 |
| 108 | 26 | BTB-00926636 | 17857480 | Milk | 17902932 | 17902936 | 210403 |
| 109 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17902932 | 17902936 | 210403 |
| 110 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 63660 |
| 111 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 63660 |
| 112 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 199074 |
| 113 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 199074 |
| 114 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 204125 |
| 115 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 204125 |
| 116 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 204126 |
| 117 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 204126 |
| 118 | 26 | BTB-00926636 | 17857480 | Milk | 17906861 | 17906865 | 210405 |
| 119 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17906861 | 17906865 | 210405 |
| 120 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 63654 |
| 121 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 199076 |
| 122 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 204129 |
| 123 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 204130 |
| 124 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17914357 | 17914361 | 210407 |
| 125 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 63655 |
| 126 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 197826 |
| 127 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 203026 |
| 128 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 203027 |
| 129 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17915872 | 17915876 | 209354 |
| 130 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 195860 |
| 131 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 201008 |
| 132 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 201009 |
| 133 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17922107 | 17922111 | 206462 |
| 134 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 33984 |
| 135 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 195668 |
| 136 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 195669 |
| 137 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 195670 |
| 138 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17928246 | 17928250 | 200758 |
| 139 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17931209 | 17931213 | 199092 |
| 140 | 26 | BTA-62125-no-rs | 17899619 | Milk | 17931209 | 17931213 | 204151 |
| 141 | 28 | BTA-99379-no-rs | 41666186 | Reproduction | 41646434 | 41646438 | 107048 |
| 142 | 28 | BTA-99379-no-rs | 41666186 | Reproduction | 41646434 | 41646438 | 107227 |
QTL type
#GALLO
par(mar=c(8,20,8,8))
plot_qtl_info(out.qtl, qtl_plot = "qtl_type", cex=1)
QTL type
#GALLO
par(mar=c(10,20,10,10))
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Reproduction")
par(mar=c(10,20,10,10))
plot_qtl_info(out.qtl, qtl_plot = "qtl_name", qtl_class="Health")
Reproduction
Health
#QTL enrichment analysis
out.enrich_qtl_name <-qtl_enrich(qtl_db= qtl_UCD1_2,
qtl_file= out.qtl, qtl_type = "Name",
enrich_type = "genome", chr.subset = NULL,
padj = "fdr",nThreads = 2)
# Sorting the dataframe in ascending order of adj.pval
sorted_df <- out.enrich_qtl_name[order(out.enrich_qtl_name$adj.pval), ]
write.csv(sorted_df,"/home/bambrozi/2_CORTISOL/GALLO/GALLO_BY_SD/out_enrich_qtl_genome_name_2.csv")
out.enrich_qtl_type <-qtl_enrich(qtl_db= qtl_UCD1_2,
qtl_file= out.qtl, qtl_type = "QTL_type",
enrich_type = "genome", chr.subset = NULL,
padj = "fdr",nThreads = 2)
sorted_df_type <- out.enrich_qtl_type[order(out.enrich_qtl_type$adj.pval), ]
write.csv(out.enrich_qtl_type,"/home/bambrozi/2_CORTISOL/GALLO/GALLO_BY_SD/out_enrich_qtl_genome_type_2.csv")
#Plots
#Name
#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_name$ID<-paste(out.enrich_qtl_name$QTL," - ","CHR",out.enrich_qtl_name$CHR,sep="")
#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered<-out.enrich_qtl_name[which(out.enrich_qtl_name$adj.pval<0.05),]
#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered, x="ID", pval="adj.pval")
#Type
#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_type$ID<-paste(out.enrich_qtl_type$QTL," - ","CHR",out.enrich_qtl_type$CHR,sep="")
#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered_type<-out.enrich_qtl_type[which(out.enrich_qtl_type$adj.pval<0.05),]
#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered_type, x="ID", pval="adj.pval")
QTL Enrichment outcomes
Enrichment by name (enrichment analysis will be performed for each trait individually)
| X | QTL | N_QTLs | N_QTLs_db | Total_annotated_QTLs | Total_QTLs_db | pvalue | adj.pval | QTL_type |
|---|---|---|---|---|---|---|---|---|
| 5 | Milk C14 index | 35 | 4437 | 108 | 163224 | 0.0000000 | 0.0000000 | Milk |
| 9 | Milk myristoleic acid content | 26 | 3047 | 108 | 163224 | 0.0000000 | 0.0000000 | Milk |
| 10 | Milk palmitoleic acid content | 15 | 2422 | 108 | 163224 | 0.0000000 | 0.0000000 | Milk |
| 6 | Milk C16 index | 12 | 2002 | 108 | 163224 | 0.0000000 | 0.0000001 | Milk |
| 12 | Muscle carnosine content | 3 | 67 | 108 | 163224 | 0.0000131 | 0.0000497 | Meat and Carcass |
| 14 | Pregnancy rate | 2 | 944 | 108 | 163224 | 0.1296627 | 0.4105985 | Reproduction |
| 18 | Teat length | 1 | 300 | 108 | 163224 | 0.1802436 | 0.4892327 | Exterior |
| 15 | Rear leg placement - side view | 1 | 430 | 108 | 163224 | 0.2479752 | 0.5235032 | Exterior |
| 17 | Stillbirth | 2 | 1363 | 108 | 163224 | 0.2280294 | 0.5235032 | Reproduction |
| 3 | Foot angle | 1 | 672 | 108 | 163224 | 0.3596272 | 0.6162877 | Exterior |
| 7 | Milk capric acid content | 1 | 912 | 108 | 163224 | 0.4541067 | 0.6162877 | Milk |
| 8 | Milk myristic acid content | 1 | 902 | 108 | 163224 | 0.4504612 | 0.6162877 | Milk |
| 13 | Net merit | 1 | 903 | 108 | 163224 | 0.4508269 | 0.6162877 | Production |
| 19 | Udder depth | 1 | 695 | 108 | 163224 | 0.3693423 | 0.6162877 | Exterior |
| 16 | Somatic cell score | 1 | 1122 | 108 | 163224 | 0.5253603 | 0.6654564 | Health |
| 2 | Conception rate | 1 | 1255 | 108 | 163224 | 0.5656375 | 0.6716945 | Reproduction |
| 1 | Calving ease | 2 | 3819 | 108 | 163224 | 0.7219243 | 0.7776744 | Reproduction |
| 4 | Length of productive life | 1 | 2004 | 108 | 163224 | 0.7367441 | 0.7776744 | Production |
| 11 | Milk yield | 1 | 6432 | 108 | 163224 | 0.9870080 | 0.9870080 | Milk |
Enrichment by QTL_type (enrichment processes performed for the QTL classes)
| X | QTL | N_QTLs | N_QTLs_db | Total_annotated_QTLs | Total_QTLs_db | pvalue | adj.pval |
|---|---|---|---|---|---|---|---|
| 1 | Exterior | 4 | 9077 | 108 | 163224 | 0.8569775 | 0.9999953 |
| 2 | Health | 1 | 5889 | 108 | 163224 | 0.9811250 | 0.9999953 |
| 3 | Meat and Carcass | 3 | 18258 | 108 | 163224 | 0.9997109 | 0.9999953 |
| 4 | Milk | 91 | 75352 | 108 | 163224 | 0.0000000 | 0.0000000 |
| 5 | Production | 2 | 19640 | 108 | 163224 | 0.9999848 | 0.9999953 |
| 6 | Reproduction | 7 | 35008 | 108 | 163224 | 0.9999953 | 0.9999953 |
From the online version of GPROFILER i got the following results.
Legend
To assess the correlation between Cortisol phenotypes and Genomic Estimated Breeding Values (GEBVs), we opt for a linear regression instead of a standard correlation test. This decision is driven by the non-normal distribution of our Cortisol phenotypes, which violates the assumptions required for traditional correlation tests.
Linear regression offers a robust alternative as it does not necessitate normality for the dependent variable. By regressing GEBVs over Cortisol, we can model the relationship between these variables. Our aim is to estimate the regression coefficient, which serves as our correlation estimate.
Due to the violation of normality assumptions for the dependent variable (Cortisol), traditional correlation tests may not provide reliable results, particularly in assessing the significance of the correlation. Therefore, alternative approaches, such as linear regression, are preferred as they do not require the same assumptions about the distribution of the dependent variable. By using linear regression, we can still assess the relationship between Cortisol and GEBVs while accommodating the non-normality of Cortisol phenotypes.
The regression model can be represented as follows: \[ y = \beta_0 + \beta_1 \times GEBV_{\text{Milk}} + \epsilon \]
Where:
This approach enables us to quantify the relationship between Cortisol and GEBVs, addressing the non-normality of Cortisol phenotypes while allowing for formal hypothesis testing of the correlation’s significance.
The first data I received from Lucas had only 135 animals out of 260 with values the other 125 had only NA I shown this to Lucas Lucas wrote to Alisson Lucas sent me the missing animals I merged this two files
rm(list = ls())
# Load the necessary library
library(dplyr)
library(tidyverse)
cortisol_260 <- read.csv("/home/bambrozi/2_CORTISOL/Data/data_clean.csv")
#This is the first dataframe with information for 135 animals and 125 NA
GEBVs1 <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/ebvs_elora.csv")
#This is the second file with information for the 125 NA animals
GEBVs2 <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/elora_missing_females_2404_06_11_2024.csv")
#This are de columns we can use because we know the meaning of the acronyms
GEBVs_to_use <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/ebv_names_lucas_06102024_BAG.csv")
sum(is.na(GEBVs1$MILK))
GEBVs1<- GEBVs1[which(is.na(GEBVs1[,"DHI_BARN_NAME"]) == F),]
sum(!is.na(GEBVs2$MILK))
GEBVs2<- GEBVs2[which(is.na(GEBVs2[,"DHI_BARN_NAME"]) == F),]
print(GEBVs1$DHI_BARN_NAME)
print(GEBVs2$DHI_BARN_NAME)
# Making the two dataframes with the same columns
# Remove elora_id and international_id from GEBVs1
GEBVs1 <- GEBVs1 %>% select(-elora_id, -international_id)
# Remove ANIMAL_ID from GEBVs2
GEBVs2 <- GEBVs2 %>% select(-ANIMAL_ID)
# Check if the two dataframes have the same columns
have_same_columns <- all(names(GEBVs1) == names(GEBVs2))
if (have_same_columns) {
print("The dataframes have the same columns.")
} else {
print("The dataframes do not have the same columns.")
}
# Check if the column names are in the same order
same_order <- identical(names(GEBVs1), names(GEBVs2))
if (same_order) {
print("The columns are in the same order.")
} else {
print("The columns are not in the same order.")
}
GEBVs_combined <- rbind(GEBVs1, GEBVs2)
# Sort the columns
sorted_cortisol_260 <- sort(cortisol_260$ID)
sorted_GEBVs_combined <- sort(GEBVs_combined$DHI_BARN_NAME)
# Check if the sorted columns have the same values
identical(sorted_cortisol_260, sorted_GEBVs_combined)
# Create a duplicate of the column 'DHI_BARN_NAME' and name it 'elora_id'
GEBVs_combined$elora_id <- GEBVs_combined$DHI_BARN_NAME
# Assuming GEBVs_combined is your data frame
GEBVs_combined <- GEBVs_combined %>%
select(elora_id, DHI_BARN_NAME, everything())
write.csv(GEBVs_combined, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/ebvs_elora_complete.csv")
# Merging the dataframe with Cortisol values, with the dataframe with GEBVs values
Merg_Cort_GEBVs <- merge(cortisol_260, GEBVs_combined, by.x = "ID", by.y = "elora_id")
write.csv(Merg_Cort_GEBVs, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/Merged_Cortisol_GEBVs.csv")
#Opening the file with the GEBVs columns to use
Columns_to_use <- readLines("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/traits_to_use.txt")
colnames(Merg_Cort_GEBVs)[405] <- "IDD"
data <- select(Merg_Cort_GEBVs, ID, T4Cortisol, BIRTH_YEAR, all_of(Columns_to_use))
# The data below has the the 55 GEBVs + Cortisol data + Birth Year
write.csv(data, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits.csv")
samp_date2 <- read.csv("/home/bambrozi/2_CORTISOL/Data/Elora animal_ids_kl_sampling_date.csv")
# Convert Sampling_date to Date using as.Date
samp_date$Sampling_date <- as.Date(samp_date$Sampling_date, format = "%m/%d/%Y")
table(samp_date$Sampling_date)
samp_date <- select(samp_date, Elora_id, Sampling_date)
# Check if data$ID and samp_dates$elora_id are identical in values and order
identical(data$ID, samp_date$Elora_id)
data_final <- merge(data, samp_date, by.x="ID", by.y="Elora_id")
data_final <- data_final %>%
select(ID, T4Cortisol, BIRTH_YEAR, Sampling_date, everything())
# The data below has the the 55 GEBVs + Cortisol data + Birth Year + Sampling data
write.csv(data_final, "/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv")
ps. I double checked by hand the select and merge process against the original tables received and is everything ok.
# Fit the linear regression model
model <- lm(T4Cortisol ~ MILK, data = data)
# Summarize the model to get the regression coefficients and statistical summary
model_summary <- summary(model)
# Extract the desired statistics
multiple_r_squared <- model_summary$r.squared
adjusted_r_squared <- model_summary$adj.r.squared
f_statistic <- model_summary$fstatistic[1]
p_value <- pf(model_summary$fstatistic[1], model_summary$fstatistic[2], model_summary$fstatistic[3], lower.tail = FALSE)
# Combine the statistics into a data frame
results <- data.frame(
Multiple_R_Squared = multiple_r_squared,
Adjusted_R_Squared = adjusted_r_squared,
F_Statistic = f_statistic,
P_Value = p_value
)
# Save the results to a CSV file
write.csv(results, file = "/home/bambrozi/2_CORTISOL/Correlation/Results/regression_summary_milk.csv", row.names = FALSE)
# Plot the linear regression between T4Cortisol and MILK
ggplot(data, aes(x = MILK, y = T4Cortisol)) +
geom_point() + # Add points
geom_smooth(method = "lm", se = FALSE, color = "blue") + # Add linear regression line
labs(title = "Linear Regression of T4Cortisol on MILK",
x = "MILK",
y = "T4Cortisol") +
theme_minimal()
Plot linear regression
Cortisol vs. Milk GEBV
Summary statistics
| Multiple_R_Squared | Adjusted_R_Squared | F_Statistic | P_Value |
|---|---|---|---|
| 5.2e-05 | -0.0038237 | 0.0134257 | 0.9078463 |
As we have 54 GEBVs to fit a linear regression we designed a loop:
# Initialize a list to store the results
results_list <- list()
# Loop through columns 4 to ncol(data) for the GEBVs
for (i in 4:ncol(data)) {
trait_name <- colnames(data)[i]
# Fit the linear regression model
model <- lm(data[[2]] ~ data[[i]], data = data)
# Summarize the model
model_summary <- summary(model)
# Extract the desired statistics
multiple_r_squared <- model_summary$r.squared
adjusted_r_squared <- model_summary$adj.r.squared
f_statistic <- model_summary$fstatistic[1] # F-statistic value
f_num_df <- model_summary$fstatistic[2] # Numerator degrees of freedom
f_den_df <- model_summary$fstatistic[3] # Denominator degrees of freedom
p_value <- pf(f_statistic, f_num_df, f_den_df, lower.tail = FALSE) # P-value
# Extract the coefficient and its p-value for the trait
coef_summary <- coef(model_summary)
trait_coef <- coef_summary[2, "Estimate"] # Assumes the trait is the second predictor
trait_p_value <- coef_summary[2, "Pr(>|t|)"]
# Combine the statistics into a data frame
result <- data.frame(
Trait = trait_name,
Multiple_R_Squared = multiple_r_squared,
Adjusted_R_Squared = adjusted_r_squared,
F_Statistic = f_statistic,
P_Value = p_value,
Coefficient = trait_coef,
Coefficient_P_Value = trait_p_value
)
# Append the result to the results list
results_list[[i - 3]] <- result
}
# Combine all results into a single data frame
results_df_0 <- do.call(rbind, results_list)
# Save the results to a CSV file
write.csv(results_df, file = "/home/bambrozi/2_CORTISOL/Correlation/Results/regression_summary_all_traits.csv", row.names = FALSE)
Summary statistics for all Traits’ GEBVs
| Trait | Multiple_R_Squared | Adjusted_R_Squared | F_Statistic | P_Value | Coefficient | Coefficient_P_Value |
|---|---|---|---|---|---|---|
| BMR | 0.0400473 | 0.0363266 | 10.7632421 | 0.0011781 | 21.1128882 | 0.0011781 |
| CO | 0.0280949 | 0.0243278 | 7.4580136 | 0.0067510 | -13.2769834 | 0.0067510 |
| MSL | 0.0198445 | 0.0160455 | 5.2235464 | 0.0230946 | -16.9454262 | 0.0230946 |
| CTFS | 0.0185227 | 0.0147186 | 4.8690531 | 0.0282237 | 15.8469990 | 0.0282237 |
| UT | 0.0166425 | 0.0128310 | 4.3664218 | 0.0376334 | -15.6741466 | 0.0376334 |
| MS | 0.0128559 | 0.0090298 | 3.3600230 | 0.0679494 | -14.7558389 | 0.0679494 |
| CK | 0.0106156 | 0.0067808 | 2.7682148 | 0.0973674 | 14.6866464 | 0.0973674 |
| IH | 0.0101222 | 0.0062855 | 2.6382359 | 0.1055404 | -7.3062481 | 0.1055404 |
| DD | 0.0100161 | 0.0061790 | 2.6103042 | 0.1073935 | -9.5352332 | 0.1073935 |
| CONF | 0.0095452 | 0.0057062 | 2.4863924 | 0.1160600 | -13.9681677 | 0.1160600 |
| UD | 0.0085295 | 0.0046866 | 2.2195470 | 0.1374945 | -15.7518165 | 0.1374945 |
| SU | 0.0077590 | 0.0039131 | 2.0174678 | 0.1567056 | 7.3716271 | 0.1567056 |
| MT | 0.0076274 | 0.0037810 | 1.9829901 | 0.1602796 | -10.6502458 | 0.1602796 |
| LP | 0.0074242 | 0.0035770 | 1.9297608 | 0.1659824 | 9.2112199 | 0.1659824 |
| MET | 0.0070642 | 0.0032156 | 1.8355367 | 0.1766601 | -7.6934858 | 0.1766601 |
| MR | 0.0066755 | 0.0028254 | 1.7338637 | 0.1890867 | -7.8752018 | 0.1890867 |
| DO | 0.0060791 | 0.0022267 | 1.5779921 | 0.2101864 | 9.5682702 | 0.2101864 |
| DHL | 0.0052895 | 0.0014340 | 1.3719419 | 0.2425591 | -9.4618477 | 0.2425591 |
| MOB | 0.0047368 | 0.0008792 | 1.2279220 | 0.2688435 | 7.8139790 | 0.2688435 |
| DF | 0.0046402 | 0.0007822 | 1.2027541 | 0.2737945 | 7.8603320 | 0.2737945 |
| FAT | 0.0046204 | 0.0007623 | 1.1975980 | 0.2748229 | 1.0090961 | 0.2748229 |
| SH | 0.0043235 | 0.0004643 | 1.1203076 | 0.2908422 | 5.2472606 | 0.2908422 |
| DS | 0.0043121 | 0.0004528 | 1.1173348 | 0.2914818 | -7.5550660 | 0.2914818 |
| FOOT | 0.0043005 | 0.0004412 | 1.1143288 | 0.2921304 | 26.7441735 | 0.2921304 |
| FA | 0.0042762 | 0.0004168 | 1.1079850 | 0.2935052 | -8.0817790 | 0.2935052 |
| PROT | 0.0040026 | 0.0001421 | 1.0368190 | 0.3095163 | 1.2915168 | 0.3095163 |
| WL | 0.0038262 | -0.0000350 | 0.9909414 | 0.3204451 | 5.2197508 | 0.3204451 |
| MDR | 0.0038096 | -0.0000516 | 0.9866366 | 0.3214965 | 7.0545996 | 0.3214965 |
| CA | 0.0036379 | -0.0002239 | 0.9420177 | 0.3326685 | 7.4603427 | 0.3326685 |
| ME | 0.0034176 | -0.0004452 | 0.8847561 | 0.3477821 | -5.0950658 | 0.3477821 |
| SCS | 0.0031318 | -0.0007321 | 0.8105371 | 0.3688011 | -4.8756313 | 0.3688011 |
| FE | 0.0028874 | -0.0009774 | 0.7470985 | 0.3881993 | -5.1774356 | 0.3881993 |
| FTP | 0.0027874 | -0.0010778 | 0.7211475 | 0.3965550 | 9.8351267 | 0.3965550 |
| AFS | 0.0025212 | -0.0013450 | 0.6521188 | 0.4201001 | -5.9185799 | 0.4201001 |
| UF | 0.0018686 | -0.0020001 | 0.4830008 | 0.4876916 | 6.8229142 | 0.4876916 |
| ST | 0.0018050 | -0.0020640 | 0.4665276 | 0.4952019 | -7.1228081 | 0.4952019 |
| MSP | 0.0014890 | -0.0023812 | 0.3847251 | 0.5356327 | -3.5290172 | 0.5356327 |
| HL | 0.0013205 | -0.0025503 | 0.3411423 | 0.5596809 | -4.3615962 | 0.5596809 |
| SCK | 0.0011411 | -0.0027305 | 0.2947310 | 0.5876733 | 3.3934702 | 0.5876733 |
| TL | 0.0009008 | -0.0029717 | 0.2326179 | 0.6299983 | -5.0172076 | 0.6299983 |
| LOC | 0.0008542 | -0.0030185 | 0.2205721 | 0.6390011 | -3.5628830 | 0.6390011 |
| DA | 0.0006826 | -0.0031907 | 0.1762328 | 0.6749803 | 3.5280623 | 0.6749803 |
| CW | 0.0005859 | -0.0032878 | 0.1512479 | 0.6976664 | -2.7721164 | 0.6976664 |
| BCS | 0.0003594 | -0.0035151 | 0.0927672 | 0.7609338 | 2.1412729 | 0.7609338 |
| RUM | 0.0003149 | -0.0035598 | 0.0812707 | 0.7758114 | -1.6952340 | 0.7758114 |
| TU | 0.0002204 | -0.0036547 | 0.0568793 | 0.8116875 | 1.0400512 | 0.8116875 |
| FL | 0.0001793 | -0.0036960 | 0.0462618 | 0.8298705 | 1.5296709 | 0.8298705 |
| BQ | 0.0001697 | -0.0037056 | 0.0437957 | 0.8343993 | -1.5434870 | 0.8343993 |
| HHE | 0.0000521 | -0.0038236 | 0.0134478 | 0.9077707 | 0.6475671 | 0.9077707 |
| MILK | 0.0000520 | -0.0038237 | 0.0134257 | 0.9078463 | 0.0042447 | 0.9078463 |
| HH | 0.0000509 | -0.0038249 | 0.0131340 | 0.9088482 | -0.6606061 | 0.9088482 |
| BD | 0.0000293 | -0.0038466 | 0.0075535 | 0.9308097 | -0.6433874 | 0.9308097 |
| DCA | 0.0000046 | -0.0038713 | 0.0011932 | 0.9724707 | -0.2685932 | 0.9724707 |
| FEED | 0.0000001 | -0.0038759 | 0.0000152 | 0.9968903 | -0.0190613 | 0.9968903 |
The regression model added the BY is shown bellow:
\[ y = \beta_0 + \beta_1 \times GEBV_{\text{Trait}} + BIRTH\_YEAR + \epsilon \]
Where:
The BIRTH_YEAR variable is converted to a factor to
account for the categorical nature of birth years.
# Convert BIRTH_YEAR to a factor and rename
data$BIRTH_YEAR <- as.factor(data$BIRTH_YEAR)
# Initialize a list to store the results
results_list <- list()
# Loop through columns 3 to ncol(data) for the GEBVs
for (i in 4:ncol(data)) {
trait_name <- colnames(data)[i]
# Fit the linear regression model with BIRTH_YEAR as an additional predictor
model <- lm(data[[2]] ~ data[[i]] + BIRTH_YEAR, data = data)
# Summarize the model
model_summary <- summary(model)
# Extract the desired statistics
multiple_r_squared <- model_summary$r.squared
adjusted_r_squared <- model_summary$adj.r.squared
f_statistic <- model_summary$fstatistic[1] # F-statistic value
f_num_df <- model_summary$fstatistic[2] # Numerator degrees of freedom
f_den_df <- model_summary$fstatistic[3] # Denominator degrees of freedom
p_value <- pf(f_statistic, f_num_df, f_den_df, lower.tail = FALSE) # P-value
# Extract the coefficient and its p-value for the trait
coef_summary <- coef(model_summary)
trait_coef <- coef_summary[2, "Estimate"] # Assumes the trait is the second predictor
trait_p_value <- coef_summary[2, "Pr(>|t|)"]
# Combine the statistics into a data frame
result <- data.frame(
Trait = trait_name,
Multiple_R_Squared = multiple_r_squared,
Adjusted_R_Squared = adjusted_r_squared,
F_Statistic = f_statistic,
P_Value = p_value,
Coefficient = trait_coef,
Coefficient_P_Value = trait_p_value
)
# Append the result to the results list
results_list[[i - 2]] <- result
}
# Combine all results into a single data frame
results_df <- do.call(rbind, results_list)
# Save the results to a CSV file
write.csv(results_df, file = "/home/bambrozi/2_CORTISOL/Correlation/Results/add_BY/regression_summary_all_traits_BY.csv", row.names = FALSE)
Summary statistics for all Traits’ GEBVs adding BIRTY_YEAR
| Trait | Multiple_R_Squared | Adjusted_R_Squared | F_Statistic | P_Value | Coefficient | Coefficient_P_Value |
|---|---|---|---|---|---|---|
| BMR | 0.0881915 | 0.0738886 | 6.165997 | 0.0000945 | 19.1527245 | 0.0028131 |
| CO | 0.0803421 | 0.0659161 | 5.569252 | 0.0002590 | -12.5790675 | 0.0094097 |
| MSL | 0.0734487 | 0.0589145 | 5.053527 | 0.0006186 | -16.2613977 | 0.0277614 |
| IH | 0.0719150 | 0.0573568 | 4.939831 | 0.0007494 | -9.3449051 | 0.0354801 |
| CTFS | 0.0705440 | 0.0559642 | 4.838504 | 0.0008891 | 14.3231800 | 0.0442704 |
| UT | 0.0704128 | 0.0558310 | 4.828827 | 0.0009037 | -14.8606501 | 0.0452229 |
| MS | 0.0686230 | 0.0540132 | 4.697043 | 0.0011284 | -14.8462730 | 0.0606040 |
| CK | 0.0670600 | 0.0524256 | 4.582365 | 0.0013689 | 15.4450943 | 0.0785803 |
| PROT | 0.0658116 | 0.0511577 | 4.491053 | 0.0015963 | 2.1108214 | 0.0970292 |
| DD | 0.0638377 | 0.0491528 | 4.347166 | 0.0020332 | -8.6326827 | 0.1365405 |
| FAT | 0.0628176 | 0.0481167 | 4.273045 | 0.0023028 | 1.2796392 | 0.1637385 |
| SU | 0.0623970 | 0.0476895 | 4.242529 | 0.0024239 | 7.0165690 | 0.1767005 |
| CONF | 0.0621537 | 0.0474424 | 4.224892 | 0.0024968 | -11.6231634 | 0.1847326 |
| MET | 0.0618359 | 0.0471196 | 4.201861 | 0.0025952 | -7.2376432 | 0.1958719 |
| MR | 0.0617517 | 0.0470341 | 4.195767 | 0.0026218 | -7.5588944 | 0.1989504 |
| FOOT | 0.0614269 | 0.0467042 | 4.172251 | 0.0027273 | 31.1509094 | 0.2113774 |
| MOB | 0.0613973 | 0.0466741 | 4.170111 | 0.0027372 | 8.6217376 | 0.2125534 |
| LP | 0.0613481 | 0.0466241 | 4.166549 | 0.0027536 | 8.0864569 | 0.2145285 |
| FTP | 0.0608284 | 0.0460963 | 4.128971 | 0.0029327 | 13.5010333 | 0.2367661 |
| DO | 0.0607904 | 0.0460577 | 4.126220 | 0.0029463 | 8.8960747 | 0.2385010 |
| ME | 0.0607825 | 0.0460496 | 4.125648 | 0.0029491 | -6.2944604 | 0.2388636 |
| UD | 0.0606216 | 0.0458862 | 4.114024 | 0.0030071 | -12.1091142 | 0.2463830 |
| FA | 0.0605549 | 0.0458185 | 4.109209 | 0.0030315 | -8.6648464 | 0.2495819 |
| MT | 0.0601784 | 0.0454361 | 4.082026 | 0.0031728 | -8.2421107 | 0.2686411 |
| CA | 0.0599002 | 0.0451535 | 4.061948 | 0.0032814 | 8.3011337 | 0.2838987 |
| DF | 0.0598369 | 0.0450892 | 4.057381 | 0.0033066 | 7.4854663 | 0.2875213 |
| DHL | 0.0595211 | 0.0447684 | 4.034611 | 0.0034352 | -8.1319072 | 0.3064961 |
| SH | 0.0586677 | 0.0439017 | 3.973163 | 0.0038074 | 4.4345159 | 0.3666760 |
| MILK | 0.0585384 | 0.0437704 | 3.963861 | 0.0038671 | 0.0328321 | 0.3771595 |
| MDR | 0.0584166 | 0.0436466 | 3.955101 | 0.0039243 | 6.0467308 | 0.3874240 |
| ST | 0.0578881 | 0.0431099 | 3.917120 | 0.0041817 | -7.9781463 | 0.4369789 |
| SCS | 0.0576964 | 0.0429152 | 3.903356 | 0.0042790 | -3.9614430 | 0.4573264 |
| AFS | 0.0575918 | 0.0428089 | 3.895845 | 0.0043331 | -5.2064660 | 0.4690656 |
| MSP | 0.0575087 | 0.0427245 | 3.889880 | 0.0043765 | -3.9533312 | 0.4787399 |
| DS | 0.0573283 | 0.0425413 | 3.876936 | 0.0044723 | -4.7841746 | 0.5009026 |
| WL | 0.0572605 | 0.0424724 | 3.872073 | 0.0045088 | 3.4479700 | 0.5096796 |
| TL | 0.0572547 | 0.0424665 | 3.871655 | 0.0045119 | -6.7563254 | 0.5104479 |
| FE | 0.0572054 | 0.0424165 | 3.868123 | 0.0045386 | -3.8209975 | 0.5170084 |
| DA | 0.0566761 | 0.0418789 | 3.830184 | 0.0048356 | 4.3714958 | 0.5986609 |
| HHE | 0.0565353 | 0.0417358 | 3.820095 | 0.0049178 | 2.6941677 | 0.6249126 |
| HL | 0.0563327 | 0.0415301 | 3.805592 | 0.0050384 | -3.1669335 | 0.6676242 |
| LOC | 0.0562600 | 0.0414563 | 3.800388 | 0.0050823 | -3.0195770 | 0.6847865 |
| FL | 0.0562527 | 0.0414489 | 3.799865 | 0.0050868 | 2.8115963 | 0.6865743 |
| SCK | 0.0560192 | 0.0412117 | 3.783153 | 0.0052307 | 1.9379506 | 0.7520160 |
| CW | 0.0559471 | 0.0411384 | 3.777995 | 0.0052759 | -1.9950607 | 0.7767448 |
| FEED | 0.0559443 | 0.0411356 | 3.777797 | 0.0052777 | -1.3559978 | 0.7777588 |
| BQ | 0.0559195 | 0.0411104 | 3.776021 | 0.0052933 | -1.9570851 | 0.7870649 |
| TU | 0.0558834 | 0.0410737 | 3.773438 | 0.0053162 | 1.0731908 | 0.8014578 |
| UF | 0.0558449 | 0.0410347 | 3.770689 | 0.0053406 | 2.2354069 | 0.8181403 |
| DCA | 0.0557115 | 0.0408991 | 3.761148 | 0.0054263 | -1.0069789 | 0.8965483 |
| BD | 0.0557011 | 0.0408885 | 3.760401 | 0.0054331 | 0.8702554 | 0.9055126 |
| BCS | 0.0556786 | 0.0408657 | 3.758795 | 0.0054477 | 0.6193741 | 0.9285710 |
| RUM | 0.0556595 | 0.0408464 | 3.757433 | 0.0054601 | -0.3154634 | 0.9570449 |
| HH | 0.0556561 | 0.0408428 | 3.757185 | 0.0054623 | -0.2539972 | 0.9646162 |
Below we can check the improvement in the percentage of variation in Cortisol explained by the Trait_GEBVs + Birth_Year.
CORR_BY <- read.csv("/home/bambrozi/2_CORTISOL/Correlation/Results/add_BY/regression_summary_all_traits_BY.csv")
CORR_MIN <- read.csv("/home/bambrozi/2_CORTISOL/Correlation/Results/regression_summary_all_traits.csv")
CORR_BY <- CORR_BY[, c("Trait", "Adjusted_R_Squared"), drop = FALSE]
CORR_MIN <- CORR_MIN[, c("Trait", "Adjusted_R_Squared"), drop = FALSE]
colnames(CORR_BY)[colnames(CORR_BY) == "Adjusted_R_Squared"] <- "Adjusted_R_Squared_BY"
colnames(CORR_MIN)[colnames(CORR_MIN) == "Adjusted_R_Squared"] <- "Adjusted_R_Squared_MIN"
Corr_R2_comp <- merge(CORR_MIN, CORR_BY, by.x = "Trait", by.y = "Trait")
Corr_R2_comp$Absolute_Change <- Corr_R2_comp$Adjusted_R_Squared_BY - Corr_R2_comp$Adjusted_R_Squared_MIN
write.csv(Corr_R2_comp, "/home/bambrozi/2_CORTISOL/Correlation/Results/add_BY/absolute_improv_r2.csv")
| X | Trait | Adjusted_R_Squared_Model_MIN | Adjusted_R_Squared_MIN.BY | Absolute_Change |
|---|---|---|---|---|
| 1 | AFS | -0.0013450 | 0.0428089 | 0.0441539 |
| 2 | BCS | -0.0035151 | 0.0408657 | 0.0443809 |
| 3 | BD | -0.0038466 | 0.0408885 | 0.0447351 |
| 4 | BMR | 0.0363266 | 0.0738886 | 0.0375621 |
| 5 | BQ | -0.0037056 | 0.0411104 | 0.0448160 |
| 6 | CA | -0.0002239 | 0.0451535 | 0.0453774 |
| 7 | CK | 0.0067808 | 0.0524256 | 0.0456448 |
| 8 | CO | 0.0243278 | 0.0659161 | 0.0415882 |
| 9 | CONF | 0.0057062 | 0.0474424 | 0.0417362 |
| 10 | CTFS | 0.0147186 | 0.0559642 | 0.0412457 |
| 11 | CW | -0.0032878 | 0.0411384 | 0.0444262 |
| 12 | DA | -0.0031907 | 0.0418789 | 0.0450696 |
| 13 | DCA | -0.0038713 | 0.0408991 | 0.0447705 |
| 14 | DD | 0.0061790 | 0.0491528 | 0.0429738 |
| 15 | DF | 0.0007822 | 0.0450892 | 0.0443070 |
| 16 | DHL | 0.0014340 | 0.0447684 | 0.0433344 |
| 17 | DO | 0.0022267 | 0.0460577 | 0.0438310 |
| 18 | DS | 0.0004528 | 0.0425413 | 0.0420884 |
| 19 | FA | 0.0004168 | 0.0458185 | 0.0454018 |
| 20 | FAT | 0.0007623 | 0.0481167 | 0.0473544 |
| 21 | FE | -0.0009774 | 0.0424165 | 0.0433939 |
| 22 | FEED | -0.0038759 | 0.0411356 | 0.0450115 |
| 23 | FL | -0.0036960 | 0.0414489 | 0.0451449 |
| 24 | FOOT | 0.0004412 | 0.0467042 | 0.0462629 |
| 25 | FTP | -0.0010778 | 0.0460963 | 0.0471741 |
| 26 | HH | -0.0038249 | 0.0408428 | 0.0446677 |
| 27 | HHE | -0.0038236 | 0.0417358 | 0.0455595 |
| 28 | HL | -0.0025503 | 0.0415301 | 0.0440805 |
| 29 | IH | 0.0062855 | 0.0573568 | 0.0510713 |
| 30 | LOC | -0.0030185 | 0.0414563 | 0.0444747 |
| 31 | LP | 0.0035770 | 0.0466241 | 0.0430471 |
| 32 | MDR | -0.0000516 | 0.0436466 | 0.0436982 |
| 33 | ME | -0.0004452 | 0.0460496 | 0.0464948 |
| 34 | MET | 0.0032156 | 0.0471196 | 0.0439039 |
| 35 | MILK | -0.0038237 | 0.0437704 | 0.0475941 |
| 36 | MOB | 0.0008792 | 0.0466741 | 0.0457949 |
| 37 | MR | 0.0028254 | 0.0470341 | 0.0442086 |
| 38 | MS | 0.0090298 | 0.0540132 | 0.0449834 |
| 39 | MSL | 0.0160455 | 0.0589145 | 0.0428690 |
| 40 | MSP | -0.0023812 | 0.0427245 | 0.0451057 |
| 41 | MT | 0.0037810 | 0.0454361 | 0.0416552 |
| 42 | PROT | 0.0001421 | 0.0511577 | 0.0510155 |
| 43 | RUM | -0.0035598 | 0.0408464 | 0.0444062 |
| 44 | SCK | -0.0027305 | 0.0412117 | 0.0439421 |
| 45 | SCS | -0.0007321 | 0.0429152 | 0.0436472 |
| 46 | SH | 0.0004643 | 0.0439017 | 0.0434374 |
| 47 | ST | -0.0020640 | 0.0431099 | 0.0451738 |
| 48 | SU | 0.0039131 | 0.0476895 | 0.0437764 |
| 49 | TL | -0.0029717 | 0.0424665 | 0.0454382 |
| 50 | TU | -0.0036547 | 0.0410737 | 0.0447284 |
| 51 | UD | 0.0046866 | 0.0458862 | 0.0411996 |
| 52 | UF | -0.0020001 | 0.0410347 | 0.0430348 |
| 53 | UT | 0.0128310 | 0.0558310 | 0.0430000 |
| 54 | WL | -0.0000350 | 0.0424724 | 0.0425074 |
Adjusted_R_Squared_Model_MIN = Model without Birth
Year
Adjusted_R_Squared_MIN.BY = Model adding Birth Year
The regression model added the BY and SAMPLING DATE is shown bellow:
\[ y = \beta_0 + \beta_1 \times GEBV_{\text{Trait}} + BIRTH\_YEAR + SAMPLING\_DATE + \epsilon \]
Where:
The SAMPLING_DATE variable is also converted to a factor
to account for the categorical nature of sampling date.
# Convert BIRTH_YEAR to a factor and rename
data_final$BIRTH_YEAR <- as.factor(data_final$BIRTH_YEAR)
# Convert Sampling_data to a factor and rename
data_final$Sampling_date <- as.factor(data_final$Sampling_date)
# Initialize a list to store the results
results_list <- list()
# Loop through columns 3 to ncol(data) for the GEBVs
for (i in 5:ncol(data_final)) {
trait_name <- colnames(data_final)[i]
# Fit the linear regression model with BIRTH_YEAR as an additional predictor
model <- lm(data_final[[2]] ~ data_final[[i]] + data_final$BIRTH_YEAR + data_final$Sampling_date , data = data_final)
# Summarize the model
model_summary <- summary(model)
# Extract the desired statistics
multiple_r_squared <- model_summary$r.squared
adjusted_r_squared <- model_summary$adj.r.squared
f_statistic <- model_summary$fstatistic[1] # F-statistic value
f_num_df <- model_summary$fstatistic[2] # Numerator degrees of freedom
f_den_df <- model_summary$fstatistic[3] # Denominator degrees of freedom
p_value <- pf(f_statistic, f_num_df, f_den_df, lower.tail = FALSE) # P-value
# Extract the coefficient and its p-value for the trait
coef_summary <- coef(model_summary)
trait_coef <- coef_summary[2, "Estimate"] # Assumes the trait is the second predictor
trait_p_value <- coef_summary[2, "Pr(>|t|)"]
# Combine the statistics into a data frame
result <- data.frame(
Trait = trait_name,
Multiple_R_Squared = multiple_r_squared,
Adjusted_R_Squared = adjusted_r_squared,
F_Statistic = f_statistic,
P_Value = p_value,
Coefficient = trait_coef,
Coefficient_P_Value = trait_p_value
)
# Append the result to the results list
results_list[[i - 2]] <- result
}
# Combine all results into a single data frame
results_df <- do.call(rbind, results_list)
# Save the results to a CSV file
write.csv(results_df, file = "/home/bambrozi/2_CORTISOL/Correlation/Results/add_BY/regression_summary_all_traits_BY.csv", row.names = FALSE)
Summary statistics for all Traits’ GEBVs adding BIRTY_YEAR and SAMPLING_DATE
| Trait | Multiple_R_Squared | Adjusted_R_Squared | F_Statistic | P_Value | Coefficient | Coefficient_P_Value |
|---|---|---|---|---|---|---|
| CO | 0.3913617 | 0.3257757 | 5.967151 | 0 | -11.1849879 | 0.0099861 |
| BMR | 0.3899307 | 0.3241904 | 5.931386 | 0 | 14.4612444 | 0.0135665 |
| LP | 0.3858330 | 0.3196512 | 5.829896 | 0 | 12.2619950 | 0.0330355 |
| MILK | 0.3850052 | 0.3187343 | 5.809559 | 0 | 0.0671160 | 0.0396652 |
| PROT | 0.3841834 | 0.3178238 | 5.789422 | 0 | 2.2166395 | 0.0476301 |
| UT | 0.3822859 | 0.3157218 | 5.743130 | 0 | -12.1238483 | 0.0731558 |
| CK | 0.3801906 | 0.3134008 | 5.692345 | 0 | 12.7333709 | 0.1192726 |
| HHE | 0.3795579 | 0.3126999 | 5.677076 | 0 | 7.4003505 | 0.1388525 |
| MSP | 0.3793006 | 0.3124149 | 5.670876 | 0 | -7.2388952 | 0.1478152 |
| DA | 0.3791391 | 0.3122360 | 5.666989 | 0 | 10.8037510 | 0.1537699 |
| TU | 0.3785722 | 0.3116080 | 5.653351 | 0 | -5.2064639 | 0.1769405 |
| MSL | 0.3777098 | 0.3106526 | 5.632656 | 0 | -8.6230634 | 0.2203509 |
| BQ | 0.3775224 | 0.3104450 | 5.628166 | 0 | -7.7284891 | 0.2313766 |
| IH | 0.3772456 | 0.3101385 | 5.621542 | 0 | -4.7354916 | 0.2488963 |
| ST | 0.3767980 | 0.3096426 | 5.610839 | 0 | -9.9721644 | 0.2808121 |
| LOC | 0.3766890 | 0.3095218 | 5.608233 | 0 | -7.2367609 | 0.2893509 |
| FAT | 0.3765584 | 0.3093772 | 5.605116 | 0 | 0.8520581 | 0.3000062 |
| UD | 0.3763515 | 0.3091480 | 5.600177 | 0 | -9.3297651 | 0.3179533 |
| CA | 0.3757417 | 0.3084725 | 5.585641 | 0 | 6.0756898 | 0.3798799 |
| MS | 0.3755038 | 0.3082090 | 5.579979 | 0 | -6.0375749 | 0.4085912 |
| SCK | 0.3754363 | 0.3081342 | 5.578372 | 0 | -4.4161473 | 0.4173165 |
| SCS | 0.3754033 | 0.3080976 | 5.577587 | 0 | 3.9080681 | 0.4216769 |
| IDD | 0.3752677 | 0.3079474 | 5.574362 | 0 | 3.7029601 | 0.4403519 |
| FOOT | 0.3751830 | 0.3078536 | 5.572349 | 0 | 17.3309360 | 0.4526548 |
| TL | 0.3750803 | 0.3077398 | 5.569908 | 0 | -6.6585204 | 0.4683140 |
| MET | 0.3749055 | 0.3075462 | 5.565755 | 0 | -3.4014191 | 0.4970656 |
| CTFS | 0.3747600 | 0.3073850 | 5.562300 | 0 | 4.0557716 | 0.5233387 |
| CONF | 0.3746137 | 0.3072229 | 5.558828 | 0 | -4.7896643 | 0.5523352 |
| FE | 0.3745078 | 0.3071056 | 5.556316 | 0 | -2.9375586 | 0.5752627 |
| HL | 0.3743988 | 0.3069849 | 5.553731 | 0 | 3.4274416 | 0.6009102 |
| FA | 0.3742870 | 0.3068610 | 5.551080 | 0 | -3.2821641 | 0.6298652 |
| DD | 0.3742836 | 0.3068573 | 5.551001 | 0 | 2.5402141 | 0.6307730 |
| MOB | 0.3742556 | 0.3068263 | 5.550337 | 0 | 3.0592146 | 0.6385442 |
| FTP | 0.3742359 | 0.3068045 | 5.549870 | 0 | 4.8583743 | 0.6441419 |
| BCS | 0.3741681 | 0.3067293 | 5.548263 | 0 | 2.6692352 | 0.6643615 |
| HH | 0.3740753 | 0.3066266 | 5.546066 | 0 | 2.0230249 | 0.6947785 |
| DF | 0.3740392 | 0.3065865 | 5.545209 | 0 | 2.3681806 | 0.7076996 |
| FL | 0.3739824 | 0.3065237 | 5.543865 | 0 | -2.1711035 | 0.7294613 |
| UF | 0.3739605 | 0.3064994 | 5.543347 | 0 | 2.8880683 | 0.7384413 |
| MDR | 0.3738855 | 0.3064163 | 5.541571 | 0 | 1.8495373 | 0.7722558 |
| WL | 0.3738548 | 0.3063822 | 5.540843 | 0 | 1.2643028 | 0.7878801 |
| AFS | 0.3738293 | 0.3063540 | 5.540239 | 0 | 1.5814380 | 0.8018670 |
| RUM | 0.3738023 | 0.3063241 | 5.539601 | 0 | -1.2574561 | 0.8179113 |
| SH | 0.3738015 | 0.3063233 | 5.539583 | 0 | 1.0342843 | 0.8184040 |
| BD | 0.3736958 | 0.3062061 | 5.537080 | 0 | 0.7613199 | 0.9071017 |
| MR | 0.3736957 | 0.3062060 | 5.537078 | 0 | 0.6291752 | 0.9071898 |
| SU | 0.3736871 | 0.3061965 | 5.536876 | 0 | 0.4745538 | 0.9186634 |
| FEED | 0.3736818 | 0.3061907 | 5.536751 | 0 | 0.3944746 | 0.9266753 |
| DHL | 0.3736774 | 0.3061858 | 5.536647 | 0 | -0.5919776 | 0.9340753 |
| DO | 0.3736757 | 0.3061838 | 5.536604 | 0 | 0.5220723 | 0.9373261 |
| DS | 0.3736695 | 0.3061769 | 5.536458 | 0 | 0.3979132 | 0.9502668 |
| CW | 0.3736676 | 0.3061749 | 5.536414 | 0 | 0.3562368 | 0.9547808 |
| ME | 0.3736658 | 0.3061729 | 5.536370 | 0 | -0.2401976 | 0.9599012 |
| MT | 0.3736595 | 0.3061659 | 5.536223 | 0 | -0.0976386 | 0.9882642 |
| DCA | 0.3736593 | 0.3061657 | 5.536217 | 0 | 0.0802555 | 0.9906432 |
Preparing files to run Variance components estimation using REML with AI (Average Information) algorithm.
First you need to create a directory in your home directory, prepare and save the following files in:
The appearance of this file is like this:
FIRST COLUMN = Animal ID SECOND COLUMN = Phenotype THIRD COLUMN = Fixed Effect 1 FOURTH COLUMN = Fixed Effect 2
To get in one file these four columns we need the following code:
#File with equivalence among different ids
eq_ids <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv")
# Genotype file with cid
geno <- read.table("/home/bambrozi/2_CORTISOL/Geno_files/genoplink.ped")
# Phenotipic file and fixed effects
data_final <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv")
# creating a pheno file with only ID, Cortisol, BY and Sam date columns
pheno <- data_final %>%
select(ID, T4Cortisol, BIRTH_YEAR, Sampling_date)
# Create a new column iid and and bring the iid from eq_ids to geno file
pheno$iid <- eq_ids$iid[match(pheno$ID, eq_ids$elora_id)]
# organizing columns sequence and keep only iid
pheno <- pheno%>%
select(iid, T4Cortisol, BIRTH_YEAR, Sampling_date)
# Create a new column geno$iid, and bring the iid from eq_ids to geno file
geno$iid <- eq_ids$iid[match(geno$V2, eq_ids$cdn_id)]
# organizing the columns sequence
library(dplyr)
geno <- geno %>%
select(V1, V2, iid, everything())
# Keeping in the pheno file only the rows present also in geno file
pheno <- pheno %>%
filter(iid %in% geno$iid)
write.table(pheno, "/home/bambrozi/2_CORTISOL/Heritability_BLUPF90/pheno_fix_eff.txt", sep = " ", col.names = FALSE, row.names = FALSE, quote = FALSE)
The file should be saved as text file, with separation by space and no columns names.
The appearance of this file is like this:
FIRST COLUMN = Animal ID SECOND COLUMN = Sire ID THIRD COLUMN = Dam ID
The file should be saved as text file, with separation by space and no columns names.
We used the code below to remove the commas of a .csv file to a file with sepation by spaces.
# to replace comma for space in the .csv file with the equivalence among IDs
sed -i 's/,/ /g' bruno_ids.csv
The appearance of this file is like this:
FIRST COLUMN = Animal ID SECOND COLUMN = Genotypes (0, 1 and 2 format)
The file should be saved as text file, with separation by space and no columns names.
We used the code below to replace the cid for iid. First we merge using the second column of the firs file, and the first column of the second file. Then we use again the command awk to keep only the third and fifth columsn and sabe in a different object.
# Using the awk function to merge the two files and the second awk to select only the 3rd and 5fh columns
awk 'FNR==NR {a[$2]=$0; next} {print a[$1], $0}' bruno_ids.csv bruno_gntps.txt | awk '{print$3,$5}' > bruno_gntps_iid
The appearance of this file is like this:
Go to the server you wanna run this analysis, for instance, grand
Now go to the directory you have created to run this analysis where that set of files are placed.
ssh grand
chmod +x renumf90
chmod +x blupf90+
./renumf90
When you run the code above, it will as you the name of your parameter card.
renumF90 will generate a new parameter card called renf90.par
./blupf90+
blupf90+ will ask you for parameter’s card name, now you should provide with the new one renf90.par
blupF90+ will generate the blupf90.log file with the results.
Now you should update you renf90.par file with these informations from the .log file
Copy Residual Variance from blupf90.log and will paste on renf90.par RANDOM_RESIDUAL_VALUES Copy Genetic variance for effect x from blupf90.log and will paste on renf90.par (CO) VARIANCE
./blupf90+
blupf90+ will ask you for parameter’s card name, now you should provide with the UPDATED renf90.par
If the Residual Variance and Genetic variance for effect x didn’t change in your blupf90.log the analysis ended, but if this value vary, you should update again the renf90.par and run again blupf90+ until this values don’t change more.
The previous analysis considered only the pedigree, but now we can insert the genotype information. To perform this you need a new diretory called Blup_Genomic inside your previously created directory.
Now you need add the reference for your genotype file in your previous parameter file renum.par and save in this new sub-directory.
The highlighted text show the added part.
Go to the sub-diretory
Note that as you are in the subdirectory, but your phenotype and fixed effect, pedigree and genotype files are still in the previous directory you need to add the highlighted part to inform the correct location
To run the renumf90 and blupf90+ you also need to add ../ to correct specify the location. Run renumF90
./../renumf90
Run blupf90+
./../blupf90+
The steps for run, update parameter card, re-run are the same.
We have 2 different output files
In this file we can find the heritabilit (SD) and for instance the convergence (similarity)
In our example:
EFFECT 3: Animal random effect, has one for each animal and it is the EBV or GEBV.
I received from Umesh a e-mail informing the three categories that the animals could be sorted based on their cortisol concentration.
pheno <- read.table("/home/bambrozi/2_CORTISOL/GWAS/pheno_genotyped.txt", header = T)
data_final <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/GEBVs_Elora/data_GEBVs_Cortisol_select_traits2.csv", header = T)
ids_eq <- read.csv("/home/bambrozi/2_CORTISOL/RawFiles/Pedigree/bruno_ids.csv", header = T)
# Create an matrix with fixed effects with only those animals which also have phenotype and genotype
data_final$cdn_id <- ids_eq$cdn_id[match(data_final$ID, ids_eq$elora_id)]
fixeff <- data_final[,c("ID", "BIRTH_YEAR", "Sampling_date", "cdn_id")]
fixeff <- fixeff[fixeff$cdn_id %in% pheno$cdn_id, ]
fixeff$FID <- "HO"
fixeff <- fixeff[, c("FID", "cdn_id", "BIRTH_YEAR", "Sampling_date")]
# Now we are gona remove the intermediary animals from pheno object
pheno$Categorical <- ifelse(pheno$cortisol >= 956, "H",
ifelse(pheno$cortisol <= 190.8, "L", "M"))
table(pheno$Categorical)
pheno <- pheno[pheno$Categorical != "M", ]
pheno <- pheno[, c("FID", "cdn_id", "cortisol")]
# Now we are going to remove from fixeff the animals which are not in pheno
fixeff <- fixeff[fixeff$cdn_id %in% pheno$cdn_id,]
#Checking if match the animals and order
identical(fixeff$cdn_id, pheno$cdn_id)
#Creating a file with animals to keep in the genotype file, we will use it on Plink
to_keep_geno <- pheno[, c("FID", "cdn_id")]
write.table(fixeff, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/fixeff.txt", quote = F, row.names = F, col.names = T)
write.table(pheno, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/pheno.txt", quote = F, row.names = F, col.names = T)
write.table(to_keep_geno, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/to_keep_geno.txt", quote = F, row.names = F, col.names = F)
We ended up with
H (Hight) = 34 animals
L (Low) = 37 animals Total = 71 animals
On Plink we will remove all individuals from genotype files that are classified as Medium, keeping only the High and Low
#!/bin/bash
DATA=/home/bambrozi/2_CORTISOL/Geno_files_after_QC/genoplink_clean
RESULT=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/geno_extreme
KEEP=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/to_keep_geno.txt
plink2 --bfile ${DATA} --keep ${KEEP} --chr-set 29 --make-bed --out ${RESULT}
#!/bin/bash
DATA=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/geno_extreme
RESULT=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/GWAS_result
PHENO=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/pheno.txt
FIXEFF=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/fixeff.txt
/home/local/bin/gcta \
--bfile ${DATA} \
--mlma-loco \
--pheno ${PHENO} \
--qcovar ${FIXEFF} \
--autosome-num 29 \
--autosome \
--out ${RESULT}
After ran the GWAS above I got the following message from the GCTA:
Error: analysis stopped because more than half of the variance components are constrained. The result would be unreliable. You may have a try of the option –reml-no-constrain.
Than a ran this new code with NO CONSTRAIN
#!/bin/bash
DATA=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/geno_extreme
RESULT=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/GWAS_result_NO_CONSTRAIN
PHENO=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/pheno.txt
FIXEFF=/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/fixeff.txt
/home/local/bin/gcta \
--bfile ${DATA} \
--reml-no-constrain \
--mlma-loco \
--pheno ${PHENO} \
--qcovar ${FIXEFF} \
--autosome-num 29 \
--autosome \
--out ${RESULT}
Now we are going to apply a correction for multiple testing modifying the Bonferroni test adjusting not for the total number of SNPs but for the number of the independent segments in the genome.
Genome_Assembly_ARS_UCD_1_2 <- read_tsv("/home/bambrozi/2_CORTISOL/GWAS/sequence_report_ARS-UCD1_2.tsv")
library(dplyr)
# Filter the rows and sum the Seq length column
# Assuming your data frame is named Genome_Assembly_ARS_UCD_1_2
L <- Genome_Assembly_ARS_UCD_1_2 %>%
filter(`UCSC style name` %in% paste0("chr", 1:29)) %>%
summarise(total_length = sum(`Seq length`)) %>%
pull(total_length)
# Converting bases to Morgan (1Mb = 1cM (0,01 Morgan))
L_M <- L/10^8
# The Ne measure is based on the article bellow:
Ne <- 66 #(Makanjoula et al., 2020)
NeL <- Ne*L_M
# This is the number of independent segment in the genome.
Me <- (2*NeL)/log10(NeL)
gwas<- read.table(file = "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/GWAS_result_NO_CONSTRAIN.loco.mlma",
head=T, stringsAsFactors = F)
gwas$Chr<- as.factor(gwas$Chr)
gwas$logP<- -log10(gwas$p)
rmv<- which(gwas$logP == "NaN")
if (length(rmv) >=1) {gwas <- gwas[-rmv,]}
bonf<- -log10(0.15/Me)
library(GHap)
ghap.manhattan(data=gwas,chr="Chr", bp="bp", y="logP", type="p", pch = 20,
cex=1, lwd=1, ylab="", xlab="Chromossomes",
main="GWAS Cortisol", backcolor="#F5EFE780", chr.ang=0,)
abline(h=(bonf), col="red2")
legend("topright", col="red2", lwd=2, c("Bonferroni corr. for ind. segments"), bty="n")
library(tidyverse)
SNP_sig_bonf_ind_seg <- filter(gwas, logP > bonf)
write.table(SNP_sig_bonf_ind_seg, file = "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/SNP_sig15_Bonf_ind_seg.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
write.csv(SNP_sig_bonf_ind_seg, "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/SNP_sig15_Bonf_ind_seg.txt")
The SNPs below are the significant after the correction above
| X | Chr | SNP | bp | A1 | A2 | Freq | b | se | p | logP |
|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 3 | BTB-00148619 | 105840426 | A | C | 0.321429 | 410.739 | 104.530 | 0.0000852 | 4.069736 |
| 2 | 3 | ARS-BFGL-NGS-118207 | 110866523 | A | C | 0.421429 | 402.730 | 103.634 | 0.0001019 | 3.991928 |
| 3 | 22 | ARS-BFGL-NGS-23411 | 5134078 | A | C | 0.214286 | 415.637 | 109.100 | 0.0001391 | 3.856526 |
Bellow we inserted the rsID searched at SNPCHIMP
| X | Chr | rsID | SNP | bp | A1 | A2 | Freq | b | se | p | logP |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 3 | rs43357861 | BTB-00148619 | 105840426 | A | C | 0.321429 | 410.739 | 104.530 | 0.0000852 | 4.069736 |
| 2 | 3 | rs110081798 | ARS-BFGL-NGS-118207 | 110866523 | A | C | 0.421429 | 402.730 | 103.634 | 0.0001019 | 3.991928 |
| 3 | 22 | rs110031217 | ARS-BFGL-NGS-23411 | 5134078 | A | C | 0.214286 | 415.637 | 109.100 | 0.0001391 | 3.856526 |
# GALLO
#import a QTL annotation file
qtl_UCD1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Animal_QTLdb_release53_cattleARS_UCD1.gff.gz",file_type="gff")
#import a gene annotation file
gene_UDC1_2 <- import_gff_gtf(db_file="/home/bambrozi/2_CORTISOL/GALLO/Bos_taurus.ARS-UCD1.2.110.gtf.gz",file_type="gtf")
#import MARKER files = the GWAS output
gwas <- read.csv(file = "/home/bambrozi/2_CORTISOL/GWAS/EXTREM_PHENO_BY_SD/SNP_sig15_Bonf_ind_seg.csv")
# Assuming "gwas" is your dataframe
gwas <- subset(gwas, select = c(Chr, SNP, bp))
colnames(gwas) <- c("CHR","SNP", "BP")
#FINDING GENES AND QTLs ARROUND THE MARKER
#FINDING GENES
out.genes <- find_genes_qtls_around_markers(db_file= gene_UDC1_2,
marker_file= gwas,
method = "gene",
marker = "snp",
interval = 50000,
nThreads = NULL)
write.table(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/EXTREM_PHENO_BY_SD/out_genes_extr_15.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
write.csv(out.genes, file = "/home/bambrozi/2_CORTISOL/GALLO/EXTREM_PHENO_BY_SD/out_genes_extr_15.csv")
#FINDING QTLs
out.qtl <- find_genes_qtls_around_markers(db_file= qtl_UCD1_2,
marker_file= gwas,
method = "qtl",
marker = "snp",
interval = 50000,
nThreads = NULL)
write.table(out.qtl, file = "/home/bambrozi/2_CORTISOL/GALLO/EXTREM_PHENO_BY_SD/out_qtl_extr_15.txt",
quote = FALSE, sep = "\t", row.names = FALSE, col.names = T)
library(tidyverse)
out.qtl.clean <- select(out.qtl, c("CHR", "SNP", "BP", "QTL_type", "start_pos", "end_pos","QTL_ID"))
write.csv(out.qtl.clean, file = "/home/bambrozi/2_CORTISOL/GALLO/EXTREM_PHENO_BY_SD/out_qtl_extr_15.csv")
The GALLO output are bellow:
For GENES
| X | CHR | SNP | BP | chr | start_pos | end_pos | width | strand | gene_id | gene_name | gene_biotype |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 1 | 3 | BTB-00148619 | 105840426 | 3 | 105807211 | 105810027 | 2817 | + | ENSBTAG00000020622 | COL9A2 | lncRNA |
| 2 | 3 | BTB-00148619 | 105840426 | 3 | 105824376 | 105867157 | 42782 | - | ENSBTAG00000044178 | ZMPSTE24 | protein_coding |
| 3 | 3 | BTB-00148619 | 105840426 | 3 | 105884724 | 105887360 | 2637 | - | ENSBTAG00000019698 | TMCO2 | protein_coding |
| 4 | 3 | ARS-BFGL-NGS-118207 | 110866523 | 3 | 110769753 | 110827938 | 58186 | + | ENSBTAG00000013867 | DLGAP3 | protein_coding |
| 5 | 3 | ARS-BFGL-NGS-118207 | 110866523 | 3 | 110835276 | 110839536 | 4261 | + | ENSBTAG00000014235 | SMIM12 | protein_coding |
| 6 | 3 | ARS-BFGL-NGS-118207 | 110866523 | 3 | 110888198 | 110890904 | 2707 | - | ENSBTAG00000013881 | GJA4 | protein_coding |
| 7 | 3 | ARS-BFGL-NGS-118207 | 110866523 | 3 | 110900183 | 110905849 | 5667 | - | ENSBTAG00000012584 | GJB3 | protein_coding |
| 8 | 22 | ARS-BFGL-NGS-23411 | 5134078 | 22 | 5091037 | 5184321 | 93285 | + | ENSBTAG00000019832 | TGFBR2 | protein_coding |
FOR QTLs
| X | CHR | SNP | BP | QTL_type | start_pos | end_pos | QTL_ID |
|---|---|---|---|---|---|---|---|
| 1 | 3 | BTB-00148619 | 105840426 | Production | 105835011 | 105835015 | 287197 |
| 2 | 3 | ARS-BFGL-NGS-118207 | 110866523 | Meat_and_Carcass | 110866521 | 110866525 | 152332 |
| 3 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5109498 | 5109502 | 151609 |
| 4 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5134076 | 5134080 | 151620 |
| 5 | 22 | ARS-BFGL-NGS-23411 | 5134078 | Meat_and_Carcass | 5173197 | 5173201 | 151601 |
#QTL enrichment analysis
out.enrich_qtl_name <-qtl_enrich(qtl_db= qtl_UCD1_2,
qtl_file= out.qtl, qtl_type = "Name",
enrich_type = "genome", chr.subset = NULL,
padj = "fdr",nThreads = 2)
# Sorting the dataframe in ascending order of adj.pval
sorted_df <- out.enrich_qtl_name[order(out.enrich_qtl_name$adj.pval), ]
write.csv(sorted_df,"/home/bambrozi/2_CORTISOL/GALLO/EXTREM_PHENO_BY_SD/out_enrich_qtl_genome_name_extr_15.csv")
out.enrich_qtl_type <-qtl_enrich(qtl_db= qtl_UCD1_2,
qtl_file= out.qtl, qtl_type = "QTL_type",
enrich_type = "genome", chr.subset = NULL,
padj = "fdr",nThreads = 2)
sorted_df_type <- out.enrich_qtl_type[order(out.enrich_qtl_type$adj.pval), ]
write.csv(out.enrich_qtl_type,"/home/bambrozi/2_CORTISOL/GALLO/EXTREM_PHENO_BY_SD/out_enrich_qtl_genome_type_extr_15.csv")
#Plots
#Name
#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_name$ID<-paste(out.enrich_qtl_name$QTL," - ","CHR",out.enrich_qtl_name$CHR,sep="")
#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered<-out.enrich_qtl_name[which(out.enrich_qtl_name$adj.pval<0.05),]
#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered, x="ID", pval="adj.pval")
#Type
#Creating a new ID composed by the trait and the chromosome
out.enrich_qtl_type$ID<-paste(out.enrich_qtl_type$QTL," - ","CHR",out.enrich_qtl_type$CHR,sep="")
#Match the QTL classes and filtering the Reproduction related QTLs
out.enrich.filtered_type<-out.enrich_qtl_type[which(out.enrich_qtl_type$adj.pval<0.05),]
#Plotting the enrichment results for the QTL enrichment analysis
dev.off()
QTLenrich_plot(out.enrich.filtered_type, x="ID", pval="adj.pval")
QTL Enrichment outcomes
Enrichment by name (enrichment analysis will be performed for each trait individually)
| X | QTL | N_QTLs | N_QTLs_db | Total_annotated_QTLs | Total_QTLs_db | pvalue | adj.pval | QTL_type |
|---|---|---|---|---|---|---|---|---|
| 2 | Muscle carnosine content | 3 | 67 | 5 | 163224 | 0.0000000 | 0.0000000 | Meat and Carcass |
| 3 | Polyunsaturated fatty acid content | 1 | 28 | 5 | 163224 | 0.0008574 | 0.0012861 | Meat and Carcass |
| 1 | Body depth | 1 | 616 | 5 | 163224 | 0.0187281 | 0.0187281 | Production |
Enrichment by QTL_type (enrichment processes performed for the QTL classes)
| X | QTL | N_QTLs | N_QTLs_db | Total_annotated_QTLs | Total_QTLs_db | pvalue | adj.pval |
|---|---|---|---|---|---|---|---|
| 1 | Meat and Carcass | 4 | 18258 | 5 | 163224 | 0.0007125 | 0.0014251 |
| 2 | Production | 1 | 19640 | 5 | 163224 | 0.4732476 | 0.4732476 |
From the online version of GPROFILER i got the following results.
Legend
\[ H_3 = \begin{pmatrix} 14 & 15 \\ 16 & 17 \\ 18 & 19 \\ 20 & 21 \end{pmatrix} \]